diff options
Diffstat (limited to 'llvm/test/Transforms')
141 files changed, 1803 insertions, 4071 deletions
diff --git a/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll b/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll index 88eff97..0c2db4a 100644 --- a/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll +++ b/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll @@ -7,9 +7,9 @@ define i32 @nested(i32 %src) #0 { ; CHECK-SAME: i32 [[A0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[BB15160:.*:]] ; CHECK-NEXT: [[T1:%.*]] = call token @llvm.experimental.convergence.entry() -; CHECK-NEXT: %"vl77672llvm.experimental.convergence.anchor()" = call token @llvm.experimental.convergence.anchor() -; CHECK-NEXT: %"op68297(vl77672)" = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[A0]]) [ "convergencectrl"(token %"vl77672llvm.experimental.convergence.anchor()") ] -; CHECK-NEXT: ret i32 %"op68297(vl77672)" +; CHECK-NEXT: %"vl14659llvm.experimental.convergence.anchor()" = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: %"op15516(vl14659)" = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[A0]]) [ "convergencectrl"(token %"vl14659llvm.experimental.convergence.anchor()") ] +; CHECK-NEXT: ret i32 %"op15516(vl14659)" ; %t1 = call token @llvm.experimental.convergence.entry() %t2 = call token @llvm.experimental.convergence.anchor() diff --git a/llvm/test/Transforms/IRNormalizer/regression-infinite-loop.ll b/llvm/test/Transforms/IRNormalizer/regression-infinite-loop.ll index 35ac0fd..b9be105 100644 --- a/llvm/test/Transforms/IRNormalizer/regression-infinite-loop.ll +++ b/llvm/test/Transforms/IRNormalizer/regression-infinite-loop.ll @@ -8,18 +8,18 @@ define void @test(ptr, i32) { ; CHECK-NEXT: %"vl72693([[A1]], 1)" = add i32 [[A1]], 1 ; CHECK-NEXT: br label %[[BB16110:.*]] ; CHECK: [[BB16110]]: -; CHECK-NEXT: %"op10912(op18080, vl72693)" = phi i32 [ %"op18080(op10412, op17645)", %[[BB16110]] ], [ %"vl72693([[A1]], 1)", %[[BB76951]] ] -; CHECK-NEXT: %"op10912(op17645, vl72693)" = phi i32 [ %"op17645(op10912)70", %[[BB16110]] ], [ %"vl72693([[A1]], 1)", %[[BB76951]] ] -; CHECK-NEXT: %"op15084(op10912)" = mul i32 %"op10912(op18080, vl72693)", undef -; CHECK-NEXT: %"op16562(op15084)" = xor i32 -1, %"op15084(op10912)" -; CHECK-NEXT: %"op44627(op10912, op16562)" = add i32 %"op10912(op18080, vl72693)", %"op16562(op15084)" -; CHECK-NEXT: %"op17645(op10912)" = add i32 -1, %"op10912(op17645, vl72693)" -; CHECK-NEXT: %"op18080(op17645, op44627)" = add i32 %"op17645(op10912)", %"op44627(op10912, op16562)" -; CHECK-NEXT: %"op17720(op15084, op18080)" = mul i32 %"op15084(op10912)", %"op18080(op17645, op44627)" -; CHECK-NEXT: %"op16562(op17720)" = xor i32 -1, %"op17720(op15084, op18080)" -; CHECK-NEXT: %"op17430(op16562, op18080)" = add i32 %"op16562(op17720)", %"op18080(op17645, op44627)" +; CHECK-NEXT: %"op81283(op18080, vl72693)" = phi i32 [ %"op18080(op10412, op18131)", %[[BB16110]] ], [ %"vl72693([[A1]], 1)", %[[BB76951]] ] +; CHECK-NEXT: %"op81283(op18131, vl72693)" = phi i32 [ %"op18131(op81283)70", %[[BB16110]] ], [ %"vl72693([[A1]], 1)", %[[BB76951]] ] +; CHECK-NEXT: %"op13219(op81283)" = mul i32 %"op81283(op18080, vl72693)", undef +; CHECK-NEXT: %"op16562(op13219)" = xor i32 -1, %"op13219(op81283)" +; CHECK-NEXT: %"op12556(op16562, op81283)" = add i32 %"op16562(op13219)", %"op81283(op18080, vl72693)" +; CHECK-NEXT: %"op18131(op81283)" = add i32 -1, %"op81283(op18131, vl72693)" +; CHECK-NEXT: %"op18080(op12556, op18131)" = add i32 %"op12556(op16562, op81283)", %"op18131(op81283)" +; CHECK-NEXT: %"op17720(op13219, op18080)" = mul i32 %"op13219(op81283)", %"op18080(op12556, op18131)" +; CHECK-NEXT: %"op16562(op17720)" = xor i32 -1, %"op17720(op13219, op18080)" +; CHECK-NEXT: %"op17430(op16562, op18080)" = add i32 %"op16562(op17720)", %"op18080(op12556, op18131)" ; CHECK-NEXT: %"op10412(op17430)" = add i32 %"op17430(op16562, op18080)", undef -; CHECK-NEXT: %"op17720(op10412, op17720)" = mul i32 %"op10412(op17430)", %"op17720(op15084, op18080)" +; CHECK-NEXT: %"op17720(op10412, op17720)" = mul i32 %"op10412(op17430)", %"op17720(op13219, op18080)" ; CHECK-NEXT: %"op16562(op17720)1" = xor i32 -1, %"op17720(op10412, op17720)" ; CHECK-NEXT: %"op17430(op10412, op16562)" = add i32 %"op10412(op17430)", %"op16562(op17720)1" ; CHECK-NEXT: %"op10412(op17430)2" = add i32 %"op17430(op10412, op16562)", undef @@ -45,11 +45,11 @@ define void @test(ptr, i32) { ; CHECK-NEXT: %"op17720(op10412, op17720)21" = mul i32 %"op10412(op17430)20", %"op17720(op10412, op17720)17" ; CHECK-NEXT: %"op16562(op17720)22" = xor i32 -1, %"op17720(op10412, op17720)21" ; CHECK-NEXT: %"op17430(op10412, op16562)23" = add i32 %"op10412(op17430)20", %"op16562(op17720)22" -; CHECK-NEXT: %"op17645(op10912)24" = add i32 -9, %"op10912(op17645, vl72693)" -; CHECK-NEXT: %"op18080(op17430, op17645)" = add i32 %"op17430(op10412, op16562)23", %"op17645(op10912)24" -; CHECK-NEXT: %"op17720(op17720, op18080)" = mul i32 %"op17720(op10412, op17720)21", %"op18080(op17430, op17645)" +; CHECK-NEXT: %"op18131(op81283)24" = add i32 -9, %"op81283(op18131, vl72693)" +; CHECK-NEXT: %"op18080(op17430, op18131)" = add i32 %"op17430(op10412, op16562)23", %"op18131(op81283)24" +; CHECK-NEXT: %"op17720(op17720, op18080)" = mul i32 %"op17720(op10412, op17720)21", %"op18080(op17430, op18131)" ; CHECK-NEXT: %"op16562(op17720)25" = xor i32 -1, %"op17720(op17720, op18080)" -; CHECK-NEXT: %"op17430(op16562, op18080)26" = add i32 %"op16562(op17720)25", %"op18080(op17430, op17645)" +; CHECK-NEXT: %"op17430(op16562, op18080)26" = add i32 %"op16562(op17720)25", %"op18080(op17430, op18131)" ; CHECK-NEXT: %"op10412(op17430)27" = add i32 %"op17430(op16562, op18080)26", undef ; CHECK-NEXT: %"op17720(op10412, op17720)28" = mul i32 %"op10412(op17430)27", %"op17720(op17720, op18080)" ; CHECK-NEXT: %"op16562(op17720)29" = xor i32 -1, %"op17720(op10412, op17720)28" @@ -66,11 +66,11 @@ define void @test(ptr, i32) { ; CHECK-NEXT: %"op17720(op10412, op17720)40" = mul i32 %"op10412(op17430)39", %"op17720(op10412, op17720)36" ; CHECK-NEXT: %"op16562(op17720)41" = xor i32 -1, %"op17720(op10412, op17720)40" ; CHECK-NEXT: %"op17430(op10412, op16562)42" = add i32 %"op10412(op17430)39", %"op16562(op17720)41" -; CHECK-NEXT: %"op17645(op10912)43" = add i32 -14, %"op10912(op17645, vl72693)" -; CHECK-NEXT: %"op18080(op17430, op17645)44" = add i32 %"op17430(op10412, op16562)42", %"op17645(op10912)43" -; CHECK-NEXT: %"op17720(op17720, op18080)45" = mul i32 %"op17720(op10412, op17720)40", %"op18080(op17430, op17645)44" +; CHECK-NEXT: %"op18131(op81283)43" = add i32 -14, %"op81283(op18131, vl72693)" +; CHECK-NEXT: %"op18080(op17430, op18131)44" = add i32 %"op17430(op10412, op16562)42", %"op18131(op81283)43" +; CHECK-NEXT: %"op17720(op17720, op18080)45" = mul i32 %"op17720(op10412, op17720)40", %"op18080(op17430, op18131)44" ; CHECK-NEXT: %"op16562(op17720)46" = xor i32 -1, %"op17720(op17720, op18080)45" -; CHECK-NEXT: %"op17430(op16562, op18080)47" = add i32 %"op16562(op17720)46", %"op18080(op17430, op17645)44" +; CHECK-NEXT: %"op17430(op16562, op18080)47" = add i32 %"op16562(op17720)46", %"op18080(op17430, op18131)44" ; CHECK-NEXT: %"op10412(op17430)48" = add i32 %"op17430(op16562, op18080)47", undef ; CHECK-NEXT: %"op17720(op10412, op17720)49" = mul i32 %"op10412(op17430)48", %"op17720(op17720, op18080)45" ; CHECK-NEXT: %"op16562(op17720)50" = xor i32 -1, %"op17720(op10412, op17720)49" @@ -93,9 +93,9 @@ define void @test(ptr, i32) { ; CHECK-NEXT: %"op17430(op10412, op16562)67" = add i32 %"op10412(op17430)64", %"op16562(op17720)66" ; CHECK-NEXT: %"op10412(op17430)68" = add i32 %"op17430(op10412, op16562)67", undef ; CHECK-NEXT: %"op10412(op10412)69" = add i32 %"op10412(op17430)68", undef -; CHECK-NEXT: %"op17645(op10912)70" = add i32 -21, %"op10912(op17645, vl72693)" -; CHECK-NEXT: %"op18080(op10412, op17645)" = add i32 %"op10412(op10412)69", %"op17645(op10912)70" -; CHECK-NEXT: store i32 %"op18080(op10412, op17645)", ptr [[A0]], align 4 +; CHECK-NEXT: %"op18131(op81283)70" = add i32 -21, %"op81283(op18131, vl72693)" +; CHECK-NEXT: %"op18080(op10412, op18131)" = add i32 %"op10412(op10412)69", %"op18131(op81283)70" +; CHECK-NEXT: store i32 %"op18080(op10412, op18131)", ptr [[A0]], align 4 ; CHECK-NEXT: br label %[[BB16110]] ; bb: diff --git a/llvm/test/Transforms/IRNormalizer/reordering-basic.ll b/llvm/test/Transforms/IRNormalizer/reordering-basic.ll index fd09ce0..06e67e0 100644 --- a/llvm/test/Transforms/IRNormalizer/reordering-basic.ll +++ b/llvm/test/Transforms/IRNormalizer/reordering-basic.ll @@ -28,16 +28,16 @@ define double @baz(double %x) { ; CHECK-SAME: double [[A0:%.*]]) { ; CHECK-NEXT: [[BB76951:.*:]] ; CHECK-NEXT: [[IFCOND:%.*]] = fcmp one double [[A0]], 0.000000e+00 -; CHECK-NEXT: br i1 [[IFCOND]], label %[[BB91455:.*]], label %[[BB914551:.*]] -; CHECK: [[BB91455]]: -; CHECK-NEXT: %"vl15001bir()" = call double @bir() +; CHECK-NEXT: br i1 [[IFCOND]], label %[[BB47054:.*]], label %[[BB470541:.*]] +; CHECK: [[BB47054]]: +; CHECK-NEXT: %"vl16994bir()" = call double @bir() ; CHECK-NEXT: br label %[[BB17254:.*]] -; CHECK: [[BB914551]]: -; CHECK-NEXT: %"vl69719bar()" = call double @bar() +; CHECK: [[BB470541]]: +; CHECK-NEXT: %"vl88592bar()" = call double @bar() ; CHECK-NEXT: br label %[[BB17254]] ; CHECK: [[BB17254]]: -; CHECK-NEXT: %"op19734(vl15001, vl69719)" = phi double [ %"vl15001bir()", %[[BB91455]] ], [ %"vl69719bar()", %[[BB914551]] ] -; CHECK-NEXT: ret double %"op19734(vl15001, vl69719)" +; CHECK-NEXT: %"op16411(vl16994, vl88592)" = phi double [ %"vl16994bir()", %[[BB47054]] ], [ %"vl88592bar()", %[[BB470541]] ] +; CHECK-NEXT: ret double %"op16411(vl16994, vl88592)" ; entry: %ifcond = fcmp one double %x, 0.000000e+00 diff --git a/llvm/test/Transforms/IRNormalizer/reordering.ll b/llvm/test/Transforms/IRNormalizer/reordering.ll index 64abe8e..a3dbcb5 100644 --- a/llvm/test/Transforms/IRNormalizer/reordering.ll +++ b/llvm/test/Transforms/IRNormalizer/reordering.ll @@ -23,7 +23,7 @@ declare void @effecting() ; Place dead instruction(s) before the terminator define void @call_effecting() { ; CHECK-LABEL: define void @call_effecting() { -; CHECK-NEXT: bb15160: +; CHECK-NEXT: bb14885: ; CHECK-NEXT: call void @effecting() ; CHECK-NEXT: [[TMP0:%.*]] = add i32 0, 1 ; CHECK-NEXT: ret void @@ -51,7 +51,7 @@ exit: define void @dont_move_above_alloca() { ; CHECK-LABEL: define void @dont_move_above_alloca() { -; CHECK-NEXT: bb15160: +; CHECK-NEXT: bb14885: ; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @effecting() ; CHECK-NEXT: ret void @@ -65,7 +65,7 @@ declare void @effecting1() define void @dont_reorder_effecting() { ; CHECK-LABEL: define void @dont_reorder_effecting() { -; CHECK-NEXT: bb10075: +; CHECK-NEXT: bb45003: ; CHECK-NEXT: call void @effecting() ; CHECK-NEXT: call void @effecting1() ; CHECK-NEXT: ret void @@ -79,7 +79,7 @@ declare void @effecting2(i32) define void @dont_reorder_effecting1() { ; CHECK-LABEL: define void @dont_reorder_effecting1() { -; CHECK-NEXT: bb10075: +; CHECK-NEXT: bb45003: ; CHECK-NEXT: [[ONE:%.*]] = add i32 1, 1 ; CHECK-NEXT: call void @effecting2(i32 [[ONE]]) ; CHECK-NEXT: [[TWO:%.*]] = add i32 2, 2 diff --git a/llvm/test/Transforms/InferAlignment/propagate-from-other-load-stores.ll b/llvm/test/Transforms/InferAlignment/propagate-from-other-load-stores.ll new file mode 100644 index 0000000..3fc7c59 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/propagate-from-other-load-stores.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=infer-alignment -S | FileCheck %s +%struct.S1 = type { %struct.float3, %struct.float3, i32, i32 } +%struct.float3 = type { float, float, float } + + +; ------------------------------------------------------------------------------ +; Test that we can propagate the align 16 to the load and store that are set to align 4 +; ------------------------------------------------------------------------------ + +define void @prop_align(ptr %v, ptr %vout) { +; CHECK-LABEL: define void @prop_align( +; CHECK-SAME: ptr [[V:%.*]], ptr [[VOUT:%.*]]) { +; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4 +; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4 +; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8 +; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8 +; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4 +; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 16 +; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4 +; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24 +; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8 +; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28 +; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4 +; CHECK-NEXT: store float [[DOTUNPACK_UNPACK]], ptr [[VOUT]], align 16 +; CHECK-NEXT: [[VOUT_REPACK23:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 4 +; CHECK-NEXT: store float [[DOTUNPACK_UNPACK8]], ptr [[VOUT_REPACK23]], align 4 +; CHECK-NEXT: [[VOUT_REPACK25:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 8 +; CHECK-NEXT: store float [[DOTUNPACK_UNPACK10]], ptr [[VOUT_REPACK25]], align 8 +; CHECK-NEXT: [[VOUT_REPACK17:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 12 +; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK]], ptr [[VOUT_REPACK17]], align 4 +; CHECK-NEXT: [[VOUT_REPACK17_REPACK27:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 16 +; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK13]], ptr [[VOUT_REPACK17_REPACK27]], align 16 +; CHECK-NEXT: [[VOUT_REPACK17_REPACK29:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 20 +; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK15]], ptr [[VOUT_REPACK17_REPACK29]], align 4 +; CHECK-NEXT: [[VOUT_REPACK19:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 24 +; CHECK-NEXT: store i32 [[DOTUNPACK4]], ptr [[VOUT_REPACK19]], align 8 +; CHECK-NEXT: [[VOUT_REPACK21:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 28 +; CHECK-NEXT: store i32 [[DOTUNPACK6]], ptr [[VOUT_REPACK21]], align 4 +; CHECK-NEXT: ret void +; + %.unpack.unpack = load float, ptr %v, align 16 + %.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4 + %.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4 + %.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8 + %.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8 + %.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12 + %.unpack2.unpack = load float, ptr %.elt1, align 4 + %.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16 + %.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4 + %.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20 + %.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4 + %.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24 + %.unpack4 = load i32, ptr %.elt3, align 8 + %.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28 + %.unpack6 = load i32, ptr %.elt5, align 4 + store float %.unpack.unpack, ptr %vout, align 16 + %vout.repack23 = getelementptr inbounds nuw i8, ptr %vout, i64 4 + store float %.unpack.unpack8, ptr %vout.repack23, align 4 + %vout.repack25 = getelementptr inbounds nuw i8, ptr %vout, i64 8 + store float %.unpack.unpack10, ptr %vout.repack25, align 8 + %vout.repack17 = getelementptr inbounds nuw i8, ptr %vout, i64 12 + store float %.unpack2.unpack, ptr %vout.repack17, align 4 + %vout.repack17.repack27 = getelementptr inbounds nuw i8, ptr %vout, i64 16 + store float %.unpack2.unpack13, ptr %vout.repack17.repack27, align 4 + %vout.repack17.repack29 = getelementptr inbounds nuw i8, ptr %vout, i64 20 + store float %.unpack2.unpack15, ptr %vout.repack17.repack29, align 4 + %vout.repack19 = getelementptr inbounds nuw i8, ptr %vout, i64 24 + store i32 %.unpack4, ptr %vout.repack19, align 8 + %vout.repack21 = getelementptr inbounds nuw i8, ptr %vout, i64 28 + store i32 %.unpack6, ptr %vout.repack21, align 4 + ret void +} + +; ------------------------------------------------------------------------------ +; Test that alignment is not propagated from a source that does not dominate the destination +; ------------------------------------------------------------------------------ + +define void @no_prop_align(ptr %v, ptr %vout, i1 %cond) { +; CHECK-LABEL: define void @no_prop_align( +; CHECK-SAME: ptr [[V:%.*]], ptr [[VOUT:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: br i1 [[COND]], label %[[BRANCH1:.*]], label %[[BRANCH2:.*]] +; CHECK: [[BRANCH1]]: +; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4 +; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4 +; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8 +; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8 +; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4 +; CHECK-NEXT: br label %[[END:.*]] +; CHECK: [[BRANCH2]]: +; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 4 +; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4 +; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24 +; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8 +; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28 +; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4 +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: +; CHECK-NEXT: ret void +; + br i1 %cond, label %branch1, label %branch2 + +branch1: + %.unpack.unpack = load float, ptr %v, align 16 + %.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4 + %.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4 + %.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8 + %.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8 + %.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12 + %.unpack2.unpack = load float, ptr %.elt1, align 4 + br label %end + +branch2: + %.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16 + %.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4 + %.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20 + %.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4 + %.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24 + %.unpack4 = load i32, ptr %.elt3, align 8 + %.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28 + %.unpack6 = load i32, ptr %.elt5, align 4 + br label %end + +end: + ret void +} + +; ------------------------------------------------------------------------------ +; Test that we can propagate to/from negative offset GEPs +; ------------------------------------------------------------------------------ + +define void @prop_align_negative_offset(ptr %v) { +; CHECK-LABEL: define void @prop_align_negative_offset( +; CHECK-SAME: ptr [[V:%.*]]) { +; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -16 +; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 16 +; CHECK-NEXT: ret void +; + %loadAligned= load float, ptr %v, align 16 + %gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -16 + %loadUnaligned = load float, ptr %gepNegative, align 4 + ret void +} + +define void @prop_align_negative_offset_2(ptr %v) { +; CHECK-LABEL: define void @prop_align_negative_offset_2( +; CHECK-SAME: ptr [[V:%.*]]) { +; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -16 +; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 16 +; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: ret void +; + %gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -16 + %loadAligned = load float, ptr %gepNegative, align 16 + %loadUnaligned= load float, ptr %v, align 4 + ret void +} + +define void @prop_align_negative_offset_3(ptr %v) { +; CHECK-LABEL: define void @prop_align_negative_offset_3( +; CHECK-SAME: ptr [[V:%.*]]) { +; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -8 +; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 8 +; CHECK-NEXT: ret void +; + %loadAligned= load float, ptr %v, align 16 + %gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -8 + %loadUnaligned = load float, ptr %gepNegative, align 4 + ret void +} + +define void @prop_align_negative_offset_4(ptr %v) { +; CHECK-LABEL: define void @prop_align_negative_offset_4( +; CHECK-SAME: ptr [[V:%.*]]) { +; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -20 +; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 4 +; CHECK-NEXT: ret void +; + %loadAligned= load float, ptr %v, align 16 + %gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -20 + %loadUnaligned = load float, ptr %gepNegative, align 4 + ret void +} diff --git a/llvm/test/Transforms/InstCombine/2025-08-06-shufflevector-bitcast-vector-of-pointers.ll b/llvm/test/Transforms/InstCombine/2025-08-06-shufflevector-bitcast-vector-of-pointers.ll new file mode 100644 index 0000000..e778d92 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/2025-08-06-shufflevector-bitcast-vector-of-pointers.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +; Make sure that we don't crash when optimizing shufflevector of <N x ptr> with <1 x i32> mask followed by bitcast of <1 x ptr> to ptr + +define ptr @test(<3 x ptr> %vptr) { +; CHECK-LABEL: define ptr @test( +; CHECK-SAME: <3 x ptr> [[VPTR:%.*]]) { +; CHECK-NEXT: [[SV_EXTRACT:%.*]] = extractelement <3 x ptr> [[VPTR]], i64 0 +; CHECK-NEXT: ret ptr [[SV_EXTRACT]] +; + %SV = shufflevector <3 x ptr> %vptr, <3 x ptr> zeroinitializer, <1 x i32> zeroinitializer + %BC = bitcast <1 x ptr> %SV to ptr + ret ptr %BC +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index a8d9a0c..aa22252b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -9,19 +9,13 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP1]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -34,7 +28,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]] ; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8> ; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[NEXT_GEP]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -92,19 +86,13 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP1]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -117,7 +105,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]] ; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8> ; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[NEXT_GEP]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 4b895ae..6d16339 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -702,12 +702,6 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; PRED-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 257, [[TMP2]] -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 ; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]] @@ -726,7 +720,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT]], splat (i16 1) ; PRED-NEXT: [[TMP14:%.*]] = uitofp <vscale x 2 x i16> [[TMP13]] to <vscale x 2 x double> ; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP14]], ptr [[NEXT_GEP]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]]) ; PRED-NEXT: [[TMP16:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x i1> [[TMP16]], i32 0 @@ -1242,9 +1236,6 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]] ; PRED-NEXT: br i1 [[TMP14]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: -; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 7 -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8 -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; PRED-NEXT: [[TMP15:%.*]] = sub i64 [[TMP0]], 8 ; PRED-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[TMP0]], 8 ; PRED-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 1ad1e426..6ff6bb4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -24,8 +24,6 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 ; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]] ; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP18]] to i32 ; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP18]], [[CONV61]] @@ -43,7 +41,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP38]] ; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP34]], align 8 ; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP39]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -110,12 +108,6 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP7]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP11]] @@ -127,7 +119,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP6]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -149,7 +141,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP33]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> zeroinitializer, ptr [[TMP34]], i32 8, <vscale x 2 x i1> [[TMP23]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP36:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] @@ -232,12 +224,6 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP7]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP0]], [[TMP11]] @@ -249,7 +235,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP6]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -274,7 +260,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP37:%.*]] = ashr i64 [[TMP36]], 32 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP37]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP23]], ptr [[TMP38]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll index ab008e7..6a592ed 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -17,16 +17,14 @@ define void @f1(ptr %A) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[TMP6]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[TMP4]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll index 596a2ed..a8d0b37 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll @@ -80,8 +80,6 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; SVE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SVE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2 ; SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; SVE: vector.body: ; SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -92,7 +90,7 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; SVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], <vscale x 2 x i64> [[TMP7]] ; SVE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP8]], i32 8, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> poison) ; SVE-NEXT: [[TMP9]] = fadd <vscale x 2 x double> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; SVE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SVE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SVE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 8b354d9..3e417a0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -28,8 +28,6 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 16 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP10]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; DEFAULT-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 16 ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[X]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP13:%.*]] = trunc <vscale x 8 x i32> [[BROADCAST_SPLAT]] to <vscale x 8 x i16> @@ -60,7 +58,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP38]], i64 [[TMP42]] ; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP36]], ptr [[TMP38]], align 1 ; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP37]], ptr [[TMP43]], align 1 -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; DEFAULT-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP44]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: @@ -103,12 +101,6 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 -; PRED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 -; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP8]] -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; PRED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 16 ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; PRED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() @@ -132,7 +124,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP23:%.*]] = trunc <vscale x 16 x i16> [[TMP21]] to <vscale x 16 x i8> ; PRED-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP23]], ptr [[TMP26]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP15]]) ; PRED-NEXT: [[TMP28:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP29:%.*]] = extractelement <vscale x 16 x i1> [[TMP28]], i32 0 @@ -270,9 +262,6 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] ; PRED-NEXT: br i1 [[TMP12]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: -; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1 -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2 -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], 2 ; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[TMP0]], 2 ; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 @@ -441,9 +430,6 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[TMP13:%.*]] = or i1 [[TMP9]], [[TMP12]] ; PRED-NEXT: br i1 [[TMP13]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: -; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 3 -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; PRED-NEXT: [[TMP14:%.*]] = sub i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP15:%.*]] = icmp ugt i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i64 [[TMP14]], i64 0 @@ -635,9 +621,6 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] ; PRED-NEXT: br i1 [[TMP12]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: -; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 3 -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll index 649be65..1c4b621 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll @@ -55,8 +55,6 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 [[TMP17]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 17, [[TMP19]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 @@ -67,7 +65,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP26:%.*]] = mul <vscale x 2 x i64> [[TMP25]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> [[BROADCAST_SPLAT4]], [[TMP26]] -; CHECK-NEXT: [[TMP27:%.*]] = mul i64 1, [[TMP21]] +; CHECK-NEXT: [[TMP27:%.*]] = mul i64 1, [[TMP17]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP27]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] @@ -84,7 +82,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 ; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP32]], align 1 ; CHECK-NEXT: [[TMP34:%.*]] = zext <vscale x 2 x i32> [[TMP31]] to <vscale x 2 x i64> ; CHECK-NEXT: [[TMP35]] = or <vscale x 2 x i64> [[VEC_PHI8]], [[TMP34]] -; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP21]] +; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT6]] ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP36]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -193,8 +191,6 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 [[TMP17]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 17, [[TMP19]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 @@ -205,7 +201,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = mul <vscale x 2 x i64> [[TMP25]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> [[BROADCAST_SPLAT4]], [[TMP38]] -; CHECK-NEXT: [[TMP39:%.*]] = mul i64 1, [[TMP21]] +; CHECK-NEXT: [[TMP39:%.*]] = mul i64 1, [[TMP17]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP39]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] @@ -222,7 +218,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no ; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP32]], align 1 ; CHECK-NEXT: [[TMP34:%.*]] = zext <vscale x 2 x i32> [[TMP31]] to <vscale x 2 x i64> ; CHECK-NEXT: [[TMP35]] = or <vscale x 2 x i64> [[VEC_PHI8]], [[TMP34]] -; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP21]] +; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT6]] ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP36]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll index f069347..fa8d17c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll @@ -145,8 +145,6 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; INTERLEAVE-4-VLA-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; INTERLEAVE-4-VLA-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE-4-VLA: vector.body: ; INTERLEAVE-4-VLA-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -172,7 +170,7 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-VLA-NEXT: [[TMP17]] = add <vscale x 4 x i32> [[VEC_PHI1]], [[WIDE_LOAD4]] ; INTERLEAVE-4-VLA-NEXT: [[TMP18]] = add <vscale x 4 x i32> [[VEC_PHI2]], [[WIDE_LOAD5]] ; INTERLEAVE-4-VLA-NEXT: [[TMP19]] = add <vscale x 4 x i32> [[VEC_PHI3]], [[WIDE_LOAD6]] -; INTERLEAVE-4-VLA-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; INTERLEAVE-4-VLA-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; INTERLEAVE-4-VLA-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-4-VLA-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; INTERLEAVE-4-VLA: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index 5f5d326..a54a404 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -72,8 +72,6 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 16 ; CHECK-VS1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]] ; CHECK-VS1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] -; CHECK-VS1-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS1-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16 ; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0 ; CHECK-VS1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer ; CHECK-VS1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -84,7 +82,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP22]], align 1 ; CHECK-VS1-NEXT: [[TMP24:%.*]] = add <vscale x 16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-VS1-NEXT: store <vscale x 16 x i8> [[TMP24]], ptr [[TMP22]], align 1 -; CHECK-VS1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-VS1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] ; CHECK-VS1-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VS1-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VS1: [[MIDDLE_BLOCK]]: @@ -103,8 +101,6 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], 8 ; CHECK-VS1-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP3]], [[TMP29]] ; CHECK-VS1-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF2]] -; CHECK-VS1-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS1-NEXT: [[TMP31:%.*]] = mul nuw i64 [[TMP30]], 8 ; CHECK-VS1-NEXT: [[TMP39:%.*]] = add i64 [[TMP0]], [[N_VEC3]] ; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[CONV]], i64 0 ; CHECK-VS1-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i8> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer @@ -116,7 +112,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x i8>, ptr [[TMP33]], align 1 ; CHECK-VS1-NEXT: [[TMP35:%.*]] = add <vscale x 8 x i8> [[WIDE_LOAD6]], [[BROADCAST_SPLAT8]] ; CHECK-VS1-NEXT: store <vscale x 8 x i8> [[TMP35]], ptr [[TMP33]], align 1 -; CHECK-VS1-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX5]], [[TMP31]] +; CHECK-VS1-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX5]], [[TMP29]] ; CHECK-VS1-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-VS1-NEXT: br i1 [[TMP36]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-VS1: [[VEC_EPILOG_MIDDLE_BLOCK]]: @@ -176,8 +172,6 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8 ; CHECK-VS2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]] ; CHECK-VS2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] -; CHECK-VS2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS2-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 8 ; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[CONV]], i64 0 ; CHECK-VS2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer ; CHECK-VS2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -188,7 +182,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP22]], align 1 ; CHECK-VS2-NEXT: [[TMP24:%.*]] = add <vscale x 8 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-VS2-NEXT: store <vscale x 8 x i8> [[TMP24]], ptr [[TMP22]], align 1 -; CHECK-VS2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-VS2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] ; CHECK-VS2-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VS2-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VS2: [[MIDDLE_BLOCK]]: @@ -207,8 +201,6 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], 4 ; CHECK-VS2-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP3]], [[TMP29]] ; CHECK-VS2-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF2]] -; CHECK-VS2-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS2-NEXT: [[TMP31:%.*]] = mul nuw i64 [[TMP30]], 4 ; CHECK-VS2-NEXT: [[TMP39:%.*]] = add i64 [[TMP0]], [[N_VEC3]] ; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[CONV]], i64 0 ; CHECK-VS2-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 4 x i8> [[BROADCAST_SPLATINSERT7]], <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer @@ -220,7 +212,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 4 x i8>, ptr [[TMP33]], align 1 ; CHECK-VS2-NEXT: [[TMP35:%.*]] = add <vscale x 4 x i8> [[WIDE_LOAD6]], [[BROADCAST_SPLAT8]] ; CHECK-VS2-NEXT: store <vscale x 4 x i8> [[TMP35]], ptr [[TMP33]], align 1 -; CHECK-VS2-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX5]], [[TMP31]] +; CHECK-VS2-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX5]], [[TMP29]] ; CHECK-VS2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-VS2-NEXT: br i1 [[TMP36]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-VS2: [[VEC_EPILOG_MIDDLE_BLOCK]]: @@ -416,12 +408,6 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP1]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer @@ -434,7 +420,7 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP13]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison) ; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr [[TMP13]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 [[TMP1]]) ; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 16 x i1> [[TMP16]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 6029095..32a69f1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -18,8 +18,6 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFNONE: [[VECTOR_BODY]]: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -28,7 +26,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8 -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFNONE: [[MIDDLE_BLOCK]]: @@ -141,8 +139,6 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFNONE: [[VECTOR_BODY]]: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -153,7 +149,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i64> [[TMP8]], <vscale x 2 x i64> zeroinitializer ; TFNONE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; TFNONE-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP10]], align 8 -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; TFNONE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFNONE: [[MIDDLE_BLOCK]]: @@ -168,10 +164,10 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP12]], 50 ; TFNONE-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END]] ; TFNONE: [[IF_THEN]]: -; TFNONE-NEXT: [[TMP13:%.*]] = call i64 @foo(i64 [[TMP12]]) #[[ATTR4]] +; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @foo(i64 [[TMP12]]) #[[ATTR4]] ; TFNONE-NEXT: br label %[[IF_END]] ; TFNONE: [[IF_END]]: -; TFNONE-NEXT: [[TMP14:%.*]] = phi i64 [ [[TMP13]], %[[IF_THEN]] ], [ 0, %[[FOR_BODY]] ] +; TFNONE-NEXT: [[TMP14:%.*]] = phi i64 [ [[TMP9]], %[[IF_THEN]] ], [ 0, %[[FOR_BODY]] ] ; TFNONE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: store i64 [[TMP14]], ptr [[ARRAYIDX1]], align 8 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -292,8 +288,6 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFNONE: [[VECTOR_BODY]]: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -306,7 +300,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i64> [[TMP10]], <vscale x 2 x i64> [[TMP9]] ; TFNONE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; TFNONE-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP11]], align 8 -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFNONE: [[MIDDLE_BLOCK]]: @@ -460,8 +454,6 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFNONE: [[VECTOR_BODY]]: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -470,7 +462,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]]) ; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8 -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TFNONE: [[MIDDLE_BLOCK]]: @@ -515,8 +507,6 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFFALLBACK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; TFFALLBACK-NEXT: br label %[[VECTOR_BODY:.*]] ; TFFALLBACK: [[VECTOR_BODY]]: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -525,7 +515,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]]) ; TFFALLBACK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFFALLBACK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8 -; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFFALLBACK-NEXT: br i1 [[TMP9]], label %[[FOR_BODY:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFFALLBACK: [[FOR_BODY]]: @@ -589,8 +579,6 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFNONE: [[VECTOR_BODY]]: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -599,7 +587,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]]) ; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8 -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TFNONE: [[MIDDLE_BLOCK]]: @@ -737,8 +725,6 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M]], i64 0 ; TFNONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer ; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -753,7 +739,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFNONE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; TFNONE-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], <vscale x 2 x double> [[TMP7]]) -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TFNONE: [[MIDDLE_BLOCK]]: @@ -926,8 +912,6 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFNONE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; TFNONE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFNONE: [[VECTOR_BODY]]: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -942,7 +926,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFNONE-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 1 ; TFNONE-NEXT: [[TMP14:%.*]] = extractelement <vscale x 2 x double> [[PREDPHI]], i32 [[TMP13]] ; TFNONE-NEXT: store double [[TMP14]], ptr [[P]], align 8 -; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TFNONE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; TFNONE: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index cc36cdb..3ab7171 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -495,12 +495,6 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; DEFAULT-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; DEFAULT-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; DEFAULT-NEXT: [[N_RND_UP:%.*]] = add i64 15, [[TMP2]] -; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; DEFAULT-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 ; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; DEFAULT-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; DEFAULT-NEXT: [[TMP7:%.*]] = sub i64 15, [[TMP6]] @@ -516,7 +510,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8() ; DEFAULT-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1) ; DEFAULT-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]] -; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8 +; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8 ; DEFAULT-NEXT: [[TMP13:%.*]] = mul i8 1, [[TMP12]] ; DEFAULT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP13]], i64 0 ; DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer @@ -534,7 +528,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[TMP21:%.*]] = add <vscale x 16 x i8> [[TMP18]], [[TMP20]] ; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; DEFAULT-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) -; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; DEFAULT-NEXT: [[TMP24:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i8> [[VEC_IND]], [[DOTSPLAT]] @@ -570,12 +564,6 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE: [[VECTOR_PH]]: ; OPTSIZE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; OPTSIZE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; OPTSIZE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; OPTSIZE-NEXT: [[N_RND_UP:%.*]] = add i64 15, [[TMP2]] -; OPTSIZE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; OPTSIZE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; OPTSIZE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; OPTSIZE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 ; OPTSIZE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; OPTSIZE-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; OPTSIZE-NEXT: [[TMP7:%.*]] = sub i64 15, [[TMP6]] @@ -591,7 +579,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8() ; OPTSIZE-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1) ; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]] -; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8 +; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8 ; OPTSIZE-NEXT: [[TMP13:%.*]] = mul i8 1, [[TMP12]] ; OPTSIZE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP13]], i64 0 ; OPTSIZE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer @@ -609,7 +597,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[TMP21:%.*]] = add <vscale x 16 x i8> [[TMP18]], [[TMP20]] ; OPTSIZE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; OPTSIZE-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) -; OPTSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; OPTSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; OPTSIZE-NEXT: [[TMP24:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; OPTSIZE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i8> [[VEC_IND]], [[DOTSPLAT]] @@ -645,12 +633,6 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE: [[VECTOR_PH]]: ; MINSIZE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; MINSIZE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; MINSIZE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; MINSIZE-NEXT: [[N_RND_UP:%.*]] = add i64 15, [[TMP2]] -; MINSIZE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; MINSIZE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; MINSIZE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; MINSIZE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 ; MINSIZE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; MINSIZE-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; MINSIZE-NEXT: [[TMP7:%.*]] = sub i64 15, [[TMP6]] @@ -666,7 +648,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8() ; MINSIZE-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1) ; MINSIZE-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]] -; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8 +; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8 ; MINSIZE-NEXT: [[TMP13:%.*]] = mul i8 1, [[TMP12]] ; MINSIZE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP13]], i64 0 ; MINSIZE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer @@ -684,7 +666,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[TMP21:%.*]] = add <vscale x 16 x i8> [[TMP18]], [[TMP20]] ; MINSIZE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; MINSIZE-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) -; MINSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; MINSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; MINSIZE-NEXT: [[TMP24:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; MINSIZE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i8> [[VEC_IND]], [[DOTSPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll index 2cec6ca..f284afc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll @@ -18,38 +18,36 @@ define void @foo() { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul <vscale x 4 x i64> [[TMP4]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP3]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_LATCH:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_LATCH]] ] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, <vscale x 4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, <vscale x 4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP7]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison) ; CHECK-NEXT: br label [[INNER_LOOP1:%.*]] ; CHECK: inner_loop1: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP12:%.*]], [[INNER_LOOP1]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x float> [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP11:%.*]], [[INNER_LOOP1]] ] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, <vscale x 4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison) -; CHECK-NEXT: [[TMP11]] = fmul <vscale x 4 x float> [[VEC_PHI2]], [[WIDE_MASKED_GATHER3]] -; CHECK-NEXT: [[TMP12]] = add nuw nsw <vscale x 4 x i64> [[VEC_PHI]], splat (i64 1) -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <vscale x 4 x i64> [[TMP12]], splat (i64 512) -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[TMP13]], i32 0 -; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_LATCH]], label [[INNER_LOOP1]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP10:%.*]], [[INNER_LOOP1]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x float> [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP9:%.*]], [[INNER_LOOP1]] ] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, <vscale x 4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP8]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison) +; CHECK-NEXT: [[TMP9]] = fmul <vscale x 4 x float> [[VEC_PHI2]], [[WIDE_MASKED_GATHER3]] +; CHECK-NEXT: [[TMP10]] = add nuw nsw <vscale x 4 x i64> [[VEC_PHI]], splat (i64 1) +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <vscale x 4 x i64> [[TMP10]], splat (i64 512) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <vscale x 4 x i1> [[TMP11]], i32 0 +; CHECK-NEXT: br i1 [[TMP12]], label [[VECTOR_LATCH]], label [[INNER_LOOP1]] ; CHECK: vector.latch: -; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x float> [ [[TMP11]], [[INNER_LOOP1]] ] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI4]], <vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> splat (i1 true)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x float> [ [[TMP9]], [[INNER_LOOP1]] ] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI4]], <vscale x 4 x ptr> [[TMP7]], i32 4, <vscale x 4 x i1> splat (i1 true)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll index 787d63c..7232fe5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll @@ -61,8 +61,6 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE: vector.body: ; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -80,7 +78,7 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[TMP17:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP16]] ; CHECK-SVE-NEXT: [[TMP18:%.*]] = mul nsw <vscale x 4 x i32> [[TMP13]], [[TMP15]] ; CHECK-SVE-NEXT: [[TMP19]] = sub <vscale x 4 x i32> [[TMP17]], [[TMP18]] -; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-SVE: middle.block: @@ -104,8 +102,6 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE-MAXBW: vector.body: ; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -124,7 +120,7 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[TMP17:%.*]] = mul nsw <vscale x 8 x i32> [[TMP13]], [[TMP15]] ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = sub <vscale x 8 x i32> zeroinitializer, [[TMP17]] ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[PARTIAL_REDUCE]], <vscale x 8 x i32> [[TMP18]]) -; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-SVE-MAXBW: middle.block: @@ -217,8 +213,6 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE: vector.body: ; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -236,7 +230,7 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[TMP17:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP16]] ; CHECK-SVE-NEXT: [[TMP18:%.*]] = mul nsw <vscale x 4 x i32> [[TMP13]], [[TMP15]] ; CHECK-SVE-NEXT: [[TMP19]] = add <vscale x 4 x i32> [[TMP17]], [[TMP18]] -; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-SVE: middle.block: @@ -260,8 +254,6 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE-MAXBW: vector.body: ; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -279,7 +271,7 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP16]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP17:%.*]] = mul nsw <vscale x 8 x i32> [[TMP13]], [[TMP15]] ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[PARTIAL_REDUCE]], <vscale x 8 x i32> [[TMP17]]) -; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-SVE-MAXBW: middle.block: @@ -373,8 +365,6 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE: vector.body: ; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -392,7 +382,7 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[TMP17:%.*]] = sub <vscale x 4 x i32> [[VEC_PHI]], [[TMP16]] ; CHECK-SVE-NEXT: [[TMP18:%.*]] = mul nsw <vscale x 4 x i32> [[TMP13]], [[TMP15]] ; CHECK-SVE-NEXT: [[TMP19]] = add <vscale x 4 x i32> [[TMP17]], [[TMP18]] -; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-SVE: middle.block: @@ -416,8 +406,6 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE-MAXBW: vector.body: ; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -436,7 +424,7 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP17]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = mul nsw <vscale x 8 x i32> [[TMP13]], [[TMP15]] ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[PARTIAL_REDUCE]], <vscale x 8 x i32> [[TMP18]]) -; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-SVE-MAXBW: middle.block: @@ -533,8 +521,6 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE: vector.body: ; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -552,7 +538,7 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-NEXT: [[TMP17:%.*]] = sub <vscale x 4 x i32> [[VEC_PHI]], [[TMP16]] ; CHECK-SVE-NEXT: [[TMP18:%.*]] = mul nsw <vscale x 4 x i32> [[TMP13]], [[TMP15]] ; CHECK-SVE-NEXT: [[TMP19]] = sub <vscale x 4 x i32> [[TMP17]], [[TMP18]] -; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-SVE: middle.block: @@ -576,8 +562,6 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE-MAXBW: vector.body: ; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -597,7 +581,7 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = mul nsw <vscale x 8 x i32> [[TMP13]], [[TMP15]] ; CHECK-SVE-MAXBW-NEXT: [[TMP19:%.*]] = sub <vscale x 8 x i32> zeroinitializer, [[TMP18]] ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[PARTIAL_REDUCE]], <vscale x 8 x i32> [[TMP19]]) -; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-SVE-MAXBW: middle.block: @@ -695,8 +679,6 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE: vector.body: ; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -716,7 +698,7 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[TMP19:%.*]] = add <vscale x 4 x i32> [[TMP17]], [[TMP18]] ; CHECK-SVE-NEXT: [[TMP20:%.*]] = mul nsw <vscale x 4 x i32> [[TMP14]], [[TMP15]] ; CHECK-SVE-NEXT: [[TMP21]] = add <vscale x 4 x i32> [[TMP19]], [[TMP20]] -; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-SVE: middle.block: @@ -740,8 +722,6 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE-MAXBW: vector.body: ; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -761,7 +741,7 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[PARTIAL_REDUCE]], <vscale x 8 x i32> [[TMP17]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = mul nsw <vscale x 8 x i32> [[TMP14]], [[TMP15]] ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE4]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[PARTIAL_REDUCE3]], <vscale x 8 x i32> [[TMP18]]) -; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-SVE-MAXBW: middle.block: @@ -863,8 +843,6 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE: vector.body: ; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -884,7 +862,7 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[TMP19:%.*]] = add <vscale x 4 x i32> [[TMP17]], [[TMP18]] ; CHECK-SVE-NEXT: [[TMP20:%.*]] = mul nsw <vscale x 4 x i32> [[TMP14]], [[TMP15]] ; CHECK-SVE-NEXT: [[TMP21]] = sub <vscale x 4 x i32> [[TMP19]], [[TMP20]] -; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-SVE: middle.block: @@ -908,8 +886,6 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE-MAXBW: vector.body: ; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -931,7 +907,7 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[TMP19:%.*]] = mul nsw <vscale x 8 x i32> [[TMP14]], [[TMP15]] ; CHECK-SVE-MAXBW-NEXT: [[TMP20:%.*]] = sub <vscale x 8 x i32> zeroinitializer, [[TMP19]] ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE4]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[PARTIAL_REDUCE3]], <vscale x 8 x i32> [[TMP20]]) -; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-SVE-MAXBW: middle.block: @@ -1029,8 +1005,6 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE: vector.body: ; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1047,7 +1021,7 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[TMP16:%.*]] = mul nsw <vscale x 4 x i32> [[TMP13]], [[TMP14]] ; CHECK-SVE-NEXT: [[TMP17:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP16]] ; CHECK-SVE-NEXT: [[TMP18]] = add <vscale x 4 x i32> [[TMP17]], [[TMP15]] -; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-SVE: middle.block: @@ -1071,8 +1045,6 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE-MAXBW: vector.body: ; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1089,7 +1061,7 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw <vscale x 8 x i32> [[TMP13]], [[TMP14]] ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP16]]) ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[PARTIAL_REDUCE]], <vscale x 8 x i32> [[TMP15]]) -; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-SVE-MAXBW: middle.block: @@ -1177,8 +1149,6 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 { ; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE: vector.body: ; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1191,7 +1161,7 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 { ; CHECK-SVE-NEXT: [[TMP12:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> ; CHECK-SVE-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP11]] ; CHECK-SVE-NEXT: [[TMP14]] = add <vscale x 4 x i32> [[TMP13]], [[TMP12]] -; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-SVE: middle.block: @@ -1215,8 +1185,6 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE-MAXBW: vector.body: ; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1229,7 +1197,7 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[TMP12:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP11]]) ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE2]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[PARTIAL_REDUCE]], <vscale x 8 x i32> [[TMP12]]) -; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-SVE-MAXBW: middle.block: @@ -1318,8 +1286,6 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE: vector.body: ; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1336,7 +1302,7 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-NEXT: [[TMP16:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP15]] ; CHECK-SVE-NEXT: [[TMP17:%.*]] = mul nsw <vscale x 4 x i32> [[TMP13]], [[TMP14]] ; CHECK-SVE-NEXT: [[TMP18]] = add <vscale x 4 x i32> [[TMP16]], [[TMP17]] -; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-SVE: middle.block: @@ -1360,8 +1326,6 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-SVE-MAXBW: vector.body: ; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1378,7 +1342,7 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP15]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw <vscale x 8 x i32> [[TMP13]], [[TMP14]] ; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[PARTIAL_REDUCE]], <vscale x 8 x i32> [[TMP16]]) -; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-SVE-MAXBW: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll index d01effd..368cb18e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll @@ -17,8 +17,6 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -44,7 +42,7 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = mul <vscale x 8 x i32> [[TMP19]], [[TMP12]] ; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP20]]) ; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI1]], <vscale x 8 x i32> [[TMP21]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -65,8 +63,6 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; CHECK-NOI8MM-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NOI8MM-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NOI8MM-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-NOI8MM-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-NOI8MM: vector.body: ; CHECK-NOI8MM-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -92,7 +88,7 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[TMP21:%.*]] = mul <vscale x 8 x i32> [[TMP19]], [[TMP12]] ; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP20]]) ; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE5]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI1]], <vscale x 8 x i32> [[TMP21]]) -; CHECK-NOI8MM-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NOI8MM-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NOI8MM-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NOI8MM-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-NOI8MM: middle.block: @@ -136,8 +132,6 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -163,7 +157,7 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = mul <vscale x 8 x i32> [[TMP19]], [[TMP12]] ; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP20]]) ; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI1]], <vscale x 8 x i32> [[TMP21]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -184,8 +178,6 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; CHECK-NOI8MM-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NOI8MM-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NOI8MM-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-NOI8MM-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-NOI8MM: vector.body: ; CHECK-NOI8MM-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -211,7 +203,7 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[TMP21:%.*]] = mul <vscale x 8 x i32> [[TMP19]], [[TMP12]] ; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP20]]) ; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE5]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI1]], <vscale x 8 x i32> [[TMP21]]) -; CHECK-NOI8MM-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NOI8MM-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NOI8MM-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NOI8MM-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-NOI8MM: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 195101f..0f0713d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -18,8 +18,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVE1: vector.body: ; CHECK-INTERLEAVE1-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT1:%.*]], [[VECTOR_BODY]] ] @@ -32,7 +30,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = mul <vscale x 4 x i32> [[TMP18]], [[TMP9]] ; CHECK-INTERLEAVE1-NEXT: [[TMP14]] = add <vscale x 4 x i32> [[TMP13]], [[VEC_PHI]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT1]] = add nuw i64 [[INDEX1]], [[TMP5]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT1]] = add nuw i64 [[INDEX1]], [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT1]], [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: @@ -52,8 +50,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT1:%.*]], [[VECTOR_BODY]] ] @@ -79,7 +75,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = mul <vscale x 4 x i32> [[TMP29]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: [[TMP23]] = add <vscale x 4 x i32> [[TMP30]], [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[TMP24]] = add <vscale x 4 x i32> [[TMP22]], [[VEC_PHI1]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT1]] = add nuw i64 [[INDEX1]], [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT1]] = add nuw i64 [[INDEX1]], [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT1]], [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: @@ -100,8 +96,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -114,7 +108,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD4]] to <vscale x 8 x i32> ; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = mul <vscale x 8 x i32> [[TMP20]], [[TMP13]] ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE5]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI1]], <vscale x 8 x i32> [[TMP22]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -157,8 +151,6 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP9]], 2 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP12]] ; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP18]], 2 ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP10]] ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -173,7 +165,7 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD2]] to <vscale x 2 x i64> ; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP13]], [[TMP11]] ; CHECK-INTERLEAVE1-NEXT: [[TMP15]] = add <vscale x 2 x i64> [[TMP14]], [[VEC_PHI]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] ; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[TMP10]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: @@ -193,8 +185,6 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP9]], 4 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP16]] ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP28]], 4 ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP10]] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -222,7 +212,7 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP21]], [[TMP15]] ; CHECK-INTERLEAVED-NEXT: [[TMP24]] = add <vscale x 2 x i64> [[TMP22]], [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[TMP25]] = add <vscale x 2 x i64> [[TMP23]], [[VEC_PHI1]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[TMP10]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: @@ -243,8 +233,6 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]] ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[N_VEC]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -259,7 +247,7 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i64> ; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP13]], [[TMP11]] ; CHECK-MAXBW-NEXT: [[TMP15]] = add <vscale x 8 x i64> [[TMP14]], [[VEC_PHI]] -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -304,8 +292,6 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP12]] ; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP14]], 2 ; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = mul i64 [[TMP10]], 2 ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP20]] ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = mul i64 [[TMP10]], 2 @@ -324,7 +310,7 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = zext <vscale x 2 x i16> [[WIDE_LOAD3]] to <vscale x 2 x i64> ; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP15]], [[TMP13]] ; CHECK-INTERLEAVE1-NEXT: [[TMP17]] = add <vscale x 2 x i64> [[TMP16]], [[VEC_PHI]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[TMP10]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: @@ -344,8 +330,6 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP11]] ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP12]], 4 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP15]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]] ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP15]], 2 @@ -377,7 +361,7 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP23]], [[TMP17]] ; CHECK-INTERLEAVED-NEXT: [[TMP26]] = add <vscale x 2 x i64> [[TMP24]], [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[TMP27]] = add <vscale x 2 x i64> [[TMP25]], [[VEC_PHI1]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[TMP15]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: @@ -398,8 +382,6 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]] ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 2 @@ -418,7 +400,7 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = zext <vscale x 4 x i16> [[WIDE_LOAD3]] to <vscale x 4 x i64> ; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = mul nuw nsw <vscale x 4 x i64> [[TMP15]], [[TMP13]] ; CHECK-MAXBW-NEXT: [[TMP17]] = add <vscale x 4 x i64> [[TMP16]], [[VEC_PHI]] -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -819,8 +801,6 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 8 ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 @@ -838,7 +818,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP16]] = mul <vscale x 8 x i32> [[TMP15]], [[TMP12]] ; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> [[VECTOR_RECUR]], <vscale x 8 x i32> [[TMP16]], i32 -1) ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = add <vscale x 8 x i32> [[TMP16]], [[TMP17]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: @@ -865,8 +845,6 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -890,7 +868,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = mul <vscale x 8 x i32> [[TMP23]], [[TMP16]] ; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> [[TMP24]], <vscale x 8 x i32> [[TMP25]], i32 -1) ; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = add <vscale x 8 x i32> [[TMP25]], [[TMP26]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: @@ -917,8 +895,6 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 8 ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 @@ -936,7 +912,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP25]] = mul <vscale x 8 x i32> [[TMP23]], [[TMP16]] ; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> [[VECTOR_RECUR]], <vscale x 8 x i32> [[TMP25]], i32 -1) ; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = add <vscale x 8 x i32> [[TMP25]], [[TMP26]] -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -986,8 +962,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVE1: vector.body: ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -999,7 +973,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = mul <vscale x 8 x i32> [[TMP15]], [[TMP12]] ; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = add <vscale x 8 x i32> [[TMP16]], [[TMP15]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: @@ -1026,8 +1000,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1045,7 +1017,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = mul <vscale x 8 x i32> [[TMP22]], [[TMP15]] ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = add <vscale x 8 x i32> [[TMP30]], [[TMP22]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: @@ -1072,8 +1044,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1085,7 +1055,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> ; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = mul <vscale x 8 x i32> [[TMP19]], [[TMP14]] ; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = add <vscale x 8 x i32> [[TMP20]], [[TMP19]] -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -1136,8 +1106,6 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP16]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP18]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]] -; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = mul nuw i64 [[TMP20]], 4 ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVE1: vector.body: ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1180,7 +1148,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP39:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD10]] to <vscale x 4 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP40:%.*]] = mul nsw <vscale x 4 x i32> [[TMP37]], [[TMP39]] ; CHECK-INTERLEAVE1-NEXT: [[TMP41]] = add <vscale x 4 x i32> [[TMP40]], [[VEC_PHI]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP26]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] ; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: @@ -1204,8 +1172,6 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP16]], 8 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP18]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]] -; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP34]], 8 ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1300,7 +1266,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = mul nsw <vscale x 4 x i32> [[TMP71]], [[TMP77]] ; CHECK-INTERLEAVED-NEXT: [[TMP80]] = add <vscale x 4 x i32> [[TMP78]], [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[TMP81]] = add <vscale x 4 x i32> [[TMP79]], [[VEC_PHI1]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP40]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] ; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: @@ -1328,8 +1294,6 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1372,7 +1336,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP71:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD26]] to <vscale x 8 x i32> ; CHECK-MAXBW-NEXT: [[TMP73:%.*]] = mul nsw <vscale x 8 x i32> [[TMP65]], [[TMP71]] ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE16]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI4]], <vscale x 8 x i32> [[TMP73]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP74:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -1445,12 +1409,6 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: entry: ; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVE1: vector.ph: -; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-INTERLEAVE1-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] -; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -1488,12 +1446,6 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: entry: ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-INTERLEAVED: vector.ph: -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-INTERLEAVED-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] -; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -1531,12 +1483,6 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-MAXBW-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] -; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -1603,8 +1549,6 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP8]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP10]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP11]], 4 ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVE1: vector.body: ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1617,7 +1561,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = mul <vscale x 4 x i32> [[TMP12]], [[TMP9]] ; CHECK-INTERLEAVE1-NEXT: [[TMP14]] = add <vscale x 4 x i32> [[TMP13]], [[VEC_PHI]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: @@ -1641,8 +1585,6 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 8 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP7]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP14]], 8 ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1668,7 +1610,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = mul <vscale x 4 x i32> [[TMP20]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: [[TMP23]] = add <vscale x 4 x i32> [[TMP21]], [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[TMP24]] = add <vscale x 4 x i32> [[TMP22]], [[VEC_PHI1]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: @@ -1693,8 +1635,6 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1707,7 +1647,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD4]] to <vscale x 8 x i32> ; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = mul <vscale x 8 x i32> [[TMP20]], [[TMP13]] ; CHECK-MAXBW-NEXT: [[TMP24]] = add <vscale x 8 x i32> [[TMP22]], [[VEC_PHI1]] -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -1755,8 +1695,6 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 41, [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 41, [[N_MOD_VF]] -; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVE1: vector.body: ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1770,7 +1708,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD1]] to <vscale x 2 x i64> ; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP13]], [[TMP9]] ; CHECK-INTERLEAVE1-NEXT: [[TMP15]] = add <vscale x 2 x i64> [[VEC_PHI]], [[TMP14]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: @@ -1791,8 +1729,6 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 41, [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 41, [[N_MOD_VF]] -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1819,7 +1755,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP21]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: [[TMP24]] = add <vscale x 2 x i64> [[VEC_PHI]], [[TMP22]] ; CHECK-INTERLEAVED-NEXT: [[TMP25]] = add <vscale x 2 x i64> [[VEC_PHI1]], [[TMP23]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: @@ -1841,8 +1777,6 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 41, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 41, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1853,14 +1787,14 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP10]] ; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP11]], align 1 -; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i64> -; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP12]], [[TMP9]] -; CHECK-MAXBW-NEXT: [[TMP14]] = add <vscale x 8 x i64> [[VEC_PHI]], [[TMP15]] -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i64> +; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP8]], [[TMP9]] +; CHECK-MAXBW-NEXT: [[TMP14]] = add <vscale x 8 x i64> [[VEC_PHI]], [[TMP13]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-MAXBW-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK-MAXBW: middle.block: -; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> [[TMP14]]) +; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> [[TMP14]]) ; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 41, [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK-MAXBW: scalar.ph: @@ -2150,8 +2084,6 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EXT_B]], i64 0 ; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2163,7 +2095,7 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i16> [[WIDE_LOAD]] to <vscale x 4 x i64> ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = mul nuw nsw <vscale x 4 x i64> [[TMP9]], [[BROADCAST_SPLAT]] ; CHECK-MAXBW-NEXT: [[TMP11]] = add <vscale x 4 x i64> [[TMP10]], [[VEC_PHI]] -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -2285,8 +2217,6 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EXT_B]], i64 0 ; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2298,7 +2228,7 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i16> [[WIDE_LOAD]] to <vscale x 4 x i64> ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = mul nuw nsw <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP9]] ; CHECK-MAXBW-NEXT: [[TMP11]] = add <vscale x 4 x i64> [[TMP10]], [[VEC_PHI]] -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -2349,8 +2279,6 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[N_VEC]] @@ -2367,7 +2295,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD2]] to <vscale x 2 x i64> ; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP16]], [[TMP14]] ; CHECK-INTERLEAVE1-NEXT: [[TMP18]] = add <vscale x 2 x i64> [[TMP17]], [[VEC_PHI]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: @@ -2392,8 +2320,6 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[N_VEC]] @@ -2423,7 +2349,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP24]], [[TMP18]] ; CHECK-INTERLEAVED-NEXT: [[TMP27]] = add <vscale x 2 x i64> [[TMP25]], [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[TMP28]] = add <vscale x 2 x i64> [[TMP26]], [[VEC_PHI1]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: @@ -2449,8 +2375,6 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]] ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[N_VEC]] @@ -2467,7 +2391,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i64> ; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP16]], [[TMP14]] ; CHECK-MAXBW-NEXT: [[TMP20]] = add <vscale x 8 x i64> [[TMP17]], [[VEC_PHI]] -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK-MAXBW: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll index a46340c..17da2af 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll @@ -18,8 +18,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVE1: vector.body: ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -33,7 +31,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = mul <vscale x 4 x i32> [[TMP12]], [[TMP9]] ; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = sub <vscale x 4 x i32> zeroinitializer, [[TMP13]] ; CHECK-INTERLEAVE1-NEXT: [[TMP15]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP14]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: @@ -53,8 +51,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -82,7 +78,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = sub <vscale x 4 x i32> zeroinitializer, [[TMP22]] ; CHECK-INTERLEAVED-NEXT: [[TMP25]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP23]] ; CHECK-INTERLEAVED-NEXT: [[TMP26]] = add <vscale x 4 x i32> [[VEC_PHI1]], [[TMP24]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: @@ -103,8 +99,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -118,7 +112,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i32> [[TMP12]], [[TMP9]] ; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = sub <vscale x 8 x i32> zeroinitializer, [[TMP13]] ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP14]]) -; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-MAXBW: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index d2c03d1..025a826 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -68,8 +68,6 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -78,7 +76,7 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP6]], align 1 ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD]] to <vscale x 16 x i32> ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[TMP8]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -254,8 +252,6 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -264,7 +260,7 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP7]], align 1 ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD]] to <vscale x 16 x i64> ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv16i64(<vscale x 2 x i64> [[VEC_PHI]], <vscale x 16 x i64> [[TMP9]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -352,8 +348,6 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -362,7 +356,7 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i16>, ptr [[TMP7]], align 2 ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 8 x i16> [[WIDE_LOAD]] to <vscale x 8 x i64> ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> [[VEC_PHI]], <vscale x 8 x i64> [[TMP9]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -450,8 +444,6 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -460,7 +452,7 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP7]], align 1 ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD]] to <vscale x 16 x i32> ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[TMP9]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -496,12 +488,6 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-INTERLEAVE1: vector.ph: ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-INTERLEAVE1-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-INTERLEAVE1-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 1025) ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVE1: vector.body: @@ -513,7 +499,7 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = zext <vscale x 4 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 4 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[TMP8]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[TMP9]], <vscale x 4 x i32> [[VEC_PHI]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-INTERLEAVE1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = extractelement <vscale x 4 x i1> [[TMP11]], i32 0 @@ -530,12 +516,6 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-INTERLEAVED: vector.ph: ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-INTERLEAVED-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-INTERLEAVED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 1025) ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVED: vector.body: @@ -547,7 +527,7 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = zext <vscale x 4 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 4 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[TMP8]], [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[TMP9]], <vscale x 4 x i32> [[VEC_PHI]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = extractelement <vscale x 4 x i1> [[TMP11]], i32 0 @@ -564,12 +544,6 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-MAXBW: vector.ph: ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-MAXBW-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 ; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 1025) ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: @@ -581,7 +555,7 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = zext <vscale x 16 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 16 x i32> ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i32> [[TMP8]], <vscale x 16 x i32> zeroinitializer ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[TMP9]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = extractelement <vscale x 16 x i1> [[TMP10]], i32 0 @@ -752,8 +726,6 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -762,7 +734,7 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7]], align 1 ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32> ; CHECK-MAXBW-NEXT: [[TMP10]] = sub <vscale x 8 x i32> [[VEC_PHI]], [[TMP9]] -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -850,8 +822,6 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -860,7 +830,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP7]], align 1 ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = sext <vscale x 16 x i8> [[WIDE_LOAD]] to <vscale x 16 x i32> ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[TMP9]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -970,8 +940,6 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 16 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP5]], 16 ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i32 [[D]], [[N_VEC]] ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = insertelement <vscale x 16 x i32> zeroinitializer, i32 [[A]], i32 0 ; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV1]], i64 0 @@ -984,7 +952,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] ; CHECK-MAXBW-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP9]], align 1 ; CHECK-MAXBW-NEXT: [[TMP11]] = add <vscale x 16 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]] -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-MAXBW: middle.block: @@ -1093,8 +1061,6 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 16 ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP5]], 16 ; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0 ; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i32 [[D]], [[N_VEC]] @@ -1108,7 +1074,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] ; CHECK-MAXBW-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP10]], align 1 ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[TMP9]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-MAXBW: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll index b4df63d..5b0696e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll @@ -12,13 +12,13 @@ define void @cost_hoisted_vector_code(ptr %p, float %arg) { ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> zeroinitializer) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX:%.*]] = add i64 1, [[INDEX1]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP8]], i32 4 -; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[P]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i32 4 +; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -8 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index 6d5bbde..3b43d52 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -16,8 +16,6 @@ define void @test_invar_gep(ptr %dst) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -36,7 +34,7 @@ define void @test_invar_gep(ptr %dst) #0 { ; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <vscale x 4 x i64> [[TMP9]], i32 [[TMP17]] ; CHECK-NEXT: store i64 [[TMP18]], ptr [[TMP14:%.*]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll index 830e7da..b732f88 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll @@ -12,9 +12,6 @@ define i32 @pr70988(ptr %src, i32 %n) { ; CHECK-NEXT: [[UMAX:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], 1 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = icmp ult i64 0, [[UMAX]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ult i64 1, [[UMAX]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 381d2e1..dcaaa89 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -59,8 +59,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; VSCALEFORTUNING2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; VSCALEFORTUNING2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z]], i64 0 ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0 @@ -121,7 +119,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2-NEXT: [[TMP46:%.*]] = or <vscale x 4 x i32> [[WIDE_MASKED_GATHER8]], [[VEC_PHI5]] ; VSCALEFORTUNING2-NEXT: [[TMP47]] = or <vscale x 4 x i32> [[TMP45]], [[WIDE_MASKED_GATHER9]] ; VSCALEFORTUNING2-NEXT: [[TMP48]] = or <vscale x 4 x i32> [[TMP46]], [[WIDE_MASKED_GATHER10]] -; VSCALEFORTUNING2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; VSCALEFORTUNING2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; VSCALEFORTUNING2-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VSCALEFORTUNING2-NEXT: br i1 [[TMP49]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VSCALEFORTUNING2: [[MIDDLE_BLOCK]]: @@ -182,12 +180,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 -; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 -; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP3]] -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; PRED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0 @@ -218,7 +210,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP27]] ; PRED-NEXT: br label %[[VECTOR_BODY:.*]] ; PRED: [[VECTOR_BODY]]: -; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[IV_NEXT:%.*]], %[[VECTOR_BODY]] ] +; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT6:%.*]], %[[VECTOR_BODY]] ] ; PRED-NEXT: [[VECTOR_RECUR4:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ] @@ -241,7 +233,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP39:%.*]] = or <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], [[VEC_PHI]] ; PRED-NEXT: [[TMP40:%.*]] = or <vscale x 4 x i32> [[TMP39]], [[WIDE_MASKED_GATHER7]] ; PRED-NEXT: [[TMP41]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[TMP40]], <vscale x 4 x i32> [[VEC_PHI]] -; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[TMP5]] +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], [[TMP2]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[IV]], i64 [[TMP10]]) ; PRED-NEXT: [[TMP42:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP43:%.*]] = extractelement <vscale x 4 x i1> [[TMP42]], i32 0 @@ -343,8 +335,6 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[X]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] @@ -362,7 +352,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP20:%.*]] = udiv <vscale x 4 x i16> [[WIDE_LOAD2]], [[BROADCAST_SPLAT]] ; DEFAULT-NEXT: [[TMP21]] = or <vscale x 4 x i16> [[TMP19]], [[VEC_PHI]] ; DEFAULT-NEXT: [[TMP22]] = or <vscale x 4 x i16> [[TMP20]], [[VEC_PHI1]] -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; DEFAULT-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: @@ -401,8 +391,6 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; VSCALEFORTUNING2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; VSCALEFORTUNING2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[X]], i64 0 ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer ; VSCALEFORTUNING2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -420,7 +408,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; VSCALEFORTUNING2-NEXT: [[TMP14:%.*]] = udiv <vscale x 4 x i16> [[WIDE_LOAD2]], [[BROADCAST_SPLAT]] ; VSCALEFORTUNING2-NEXT: [[TMP15]] = or <vscale x 4 x i16> [[TMP13]], [[VEC_PHI]] ; VSCALEFORTUNING2-NEXT: [[TMP16]] = or <vscale x 4 x i16> [[TMP14]], [[VEC_PHI1]] -; VSCALEFORTUNING2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; VSCALEFORTUNING2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; VSCALEFORTUNING2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VSCALEFORTUNING2-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VSCALEFORTUNING2: [[MIDDLE_BLOCK]]: @@ -454,12 +442,6 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8 -; PRED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1 -; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP5]] -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 8 ; PRED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 8 ; PRED-NEXT: [[TMP10:%.*]] = sub i64 [[TMP0]], [[TMP9]] @@ -478,7 +460,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP20:%.*]] = udiv <vscale x 8 x i16> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] ; PRED-NEXT: [[TMP21:%.*]] = or <vscale x 8 x i16> [[TMP20]], [[VEC_PHI]] ; PRED-NEXT: [[TMP16]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x i16> [[TMP21]], <vscale x 8 x i16> [[VEC_PHI]] -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP12]]) ; PRED-NEXT: [[TMP17:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP18:%.*]] = extractelement <vscale x 8 x i1> [[TMP17]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index eb3d724..8d8d427 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -23,15 +23,13 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP8]], splat (i32 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP5]] ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -44,7 +42,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x double>, ptr [[TMP16]], align 8 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[B:%.*]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: store <vscale x 2 x double> [[WIDE_LOAD]], ptr [[TMP18]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll index a7ec749..c78f68f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll @@ -14,8 +14,6 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -28,7 +26,7 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture ; CHECK-NEXT: [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[TMP12]]) ; CHECK-NEXT: [[TMP14]] = fadd fast float [[VEC_PHI]], [[TMP13]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -98,8 +96,6 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -113,7 +109,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP12]]) ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP13]], [[VEC_PHI]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP13]], float [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index 0cad053..b0ee9fc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -42,37 +42,35 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP6]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP7]] = fadd <vscale x 8 x float> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP4]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP5]] = fadd <vscale x 8 x float> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[TMP7]]) +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[TMP5]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-UNORDERED-NEXT: [[ADD]] = fadd float [[TMP10]], [[SUM_07]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[ADD]] = fadd float [[TMP8]], [[SUM_07]] ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[ADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fadd_strict @@ -87,36 +85,34 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP6]], align 4 -; CHECK-ORDERED-NEXT: [[TMP7]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[WIDE_LOAD]]) -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP4]], align 4 +; CHECK-ORDERED-NEXT: [[TMP5]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[WIDE_LOAD]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-NEXT: [[ADD]] = fadd float [[TMP9]], [[SUM_07]] +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[ADD]] = fadd float [[TMP7]], [[SUM_07]] ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[ADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fadd_strict @@ -126,32 +122,26 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] -; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] +; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP10]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[WIDE_MASKED_LOAD]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP11]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = extractelement <vscale x 8 x i1> [[TMP13]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP7]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[WIDE_MASKED_LOAD]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP9]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP8]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = extractelement <vscale x 8 x i1> [[TMP10]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -162,13 +152,13 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD]] = fadd float [[TMP15]], [[SUM_07]] +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD]] = fadd float [[TMP12]], [[SUM_07]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[ADD_LCSSA]] ; @@ -222,58 +212,56 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP8]] -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 16 -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP11]] -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 24 -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP14]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP6]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP9]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP12]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP15]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP16]] = fadd <vscale x 8 x float> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-UNORDERED-NEXT: [[TMP17]] = fadd <vscale x 8 x float> [[WIDE_LOAD4]], [[VEC_PHI1]] -; CHECK-UNORDERED-NEXT: [[TMP18]] = fadd <vscale x 8 x float> [[WIDE_LOAD5]], [[VEC_PHI2]] -; CHECK-UNORDERED-NEXT: [[TMP19]] = fadd <vscale x 8 x float> [[WIDE_LOAD6]], [[VEC_PHI3]] -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP12]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP4]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP7]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP10]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP13]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP14]] = fadd <vscale x 8 x float> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-UNORDERED-NEXT: [[TMP15]] = fadd <vscale x 8 x float> [[WIDE_LOAD4]], [[VEC_PHI1]] +; CHECK-UNORDERED-NEXT: [[TMP16]] = fadd <vscale x 8 x float> [[WIDE_LOAD5]], [[VEC_PHI2]] +; CHECK-UNORDERED-NEXT: [[TMP17]] = fadd <vscale x 8 x float> [[WIDE_LOAD6]], [[VEC_PHI3]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd <vscale x 8 x float> [[TMP17]], [[TMP16]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX7:%.*]] = fadd <vscale x 8 x float> [[TMP18]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX8:%.*]] = fadd <vscale x 8 x float> [[TMP19]], [[BIN_RDX7]] -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[BIN_RDX8]]) +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd <vscale x 8 x float> [[TMP15]], [[TMP14]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX7:%.*]] = fadd <vscale x 8 x float> [[TMP16]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX8:%.*]] = fadd <vscale x 8 x float> [[TMP17]], [[BIN_RDX7]] +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[BIN_RDX8]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-UNORDERED-NEXT: [[ADD]] = fadd float [[TMP22]], [[SUM_07]] +; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[ADD]] = fadd float [[TMP20]], [[SUM_07]] ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[ADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fadd_strict_unroll @@ -288,51 +276,49 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP8]] -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 16 -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP11]] -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 24 -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP14]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP6]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, ptr [[TMP9]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x float>, ptr [[TMP12]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x float>, ptr [[TMP15]], align 4 -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[WIDE_LOAD]]) -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP16]], <vscale x 8 x float> [[WIDE_LOAD1]]) -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP17]], <vscale x 8 x float> [[WIDE_LOAD2]]) -; CHECK-ORDERED-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP18]], <vscale x 8 x float> [[WIDE_LOAD3]]) -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP12]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP4]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, ptr [[TMP7]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x float>, ptr [[TMP10]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x float>, ptr [[TMP13]], align 4 +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[WIDE_LOAD]]) +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP14]], <vscale x 8 x float> [[WIDE_LOAD1]]) +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP15]], <vscale x 8 x float> [[WIDE_LOAD2]]) +; CHECK-ORDERED-NEXT: [[TMP17]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP16]], <vscale x 8 x float> [[WIDE_LOAD3]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-NEXT: [[ADD]] = fadd float [[TMP21]], [[SUM_07]] +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[ADD]] = fadd float [[TMP19]], [[SUM_07]] ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[ADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fadd_strict_unroll @@ -342,26 +328,20 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] -; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 8 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]] -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 16 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]] -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 24 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP15]] +; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] +; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 16 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP12]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) @@ -373,46 +353,46 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT12:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT13:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT14:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP33:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP15]] ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP18]] ; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP21]] -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP24]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP16]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP19]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP22]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP25]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[WIDE_MASKED_LOAD]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP26]]) -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP27]], <vscale x 8 x float> [[TMP28]]) -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP29]], <vscale x 8 x float> [[TMP30]]) -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP33]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP31]], <vscale x 8 x float> [[TMP32]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP21]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP13]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP16]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP19]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP22]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[WIDE_MASKED_LOAD]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP23]]) +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP24]], <vscale x 8 x float> [[TMP25]]) +; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP26]], <vscale x 8 x float> [[TMP27]]) +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP30]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP28]], <vscale x 8 x float> [[TMP29]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = mul nuw i64 [[TMP31]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], [[TMP32]] ; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = mul nuw i64 [[TMP34]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = mul nuw i64 [[TMP34]], 16 ; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = add i64 [[INDEX]], [[TMP35]] ; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], 24 ; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = add i64 [[INDEX]], [[TMP38]] -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = mul nuw i64 [[TMP40]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = add i64 [[INDEX]], [[TMP41]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP36]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP39]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP42]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = extractelement <vscale x 8 x i1> [[TMP43]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP33]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP36]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP39]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = extractelement <vscale x 8 x i1> [[TMP40]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -423,13 +403,13 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD]] = fadd float [[TMP45]], [[SUM_07]] +; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD]] = fadd float [[TMP42]], [[SUM_07]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP30]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[ADD_LCSSA]] ; @@ -500,54 +480,52 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 2 -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[A2]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[A1]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[A2]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[A1]], i32 0 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP10]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x float> [ [[TMP11]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP8]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x float> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX]] -; CHECK-UNORDERED-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP12]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX]] +; CHECK-UNORDERED-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP10]], align 4 ; CHECK-UNORDERED-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_VEC]]) -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0 -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1 -; CHECK-UNORDERED-NEXT: [[TMP15]] = fadd <vscale x 4 x float> [[TMP13]], [[VEC_PHI1]] -; CHECK-UNORDERED-NEXT: [[TMP16]] = fadd <vscale x 4 x float> [[TMP14]], [[VEC_PHI]] -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0 +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1 +; CHECK-UNORDERED-NEXT: [[TMP13]] = fadd <vscale x 4 x float> [[TMP11]], [[VEC_PHI1]] +; CHECK-UNORDERED-NEXT: [[TMP14]] = fadd <vscale x 4 x float> [[TMP12]], [[VEC_PHI]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP16]]) -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP15]]) +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP14]]) +; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP13]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[A2]], [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[A1]], [[ENTRY]] ] -; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ [[A2]], [[ENTRY:%.*]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[A1]], [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ADD_PHI2:%.*]] = phi float [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDXB1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 -; CHECK-UNORDERED-NEXT: [[ADD1]] = fadd float [[TMP20]], [[ADD_PHI2]] +; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 +; CHECK-UNORDERED-NEXT: [[ADD1]] = fadd float [[TMP18]], [[ADD_PHI2]] ; CHECK-UNORDERED-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[ARRAYIDXB2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OR]] -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 -; CHECK-UNORDERED-NEXT: [[ADD2]] = fadd float [[TMP21]], [[ADD_PHI1]] +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 +; CHECK-UNORDERED-NEXT: [[ADD2]] = fadd float [[TMP19]], [[ADD_PHI1]] ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 2 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] -; CHECK-UNORDERED-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 ; CHECK-UNORDERED-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 ; CHECK-UNORDERED-NEXT: ret void @@ -570,50 +548,48 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX]] -; CHECK-ORDERED-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP10]], align 4 +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX]] +; CHECK-ORDERED-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP8]], align 4 ; CHECK-ORDERED-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_VEC]]) -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0 -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1 -; CHECK-ORDERED-NEXT: [[TMP13]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP12]]) -; CHECK-ORDERED-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], <vscale x 4 x float> [[TMP11]]) -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1 +; CHECK-ORDERED-NEXT: [[TMP11]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP10]]) +; CHECK-ORDERED-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], <vscale x 4 x float> [[TMP9]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[A2]], [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ [[A1]], [[ENTRY]] ] -; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ [[A2]], [[ENTRY:%.*]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[A1]], [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ADD_PHI2:%.*]] = phi float [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDXB1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 -; CHECK-ORDERED-NEXT: [[ADD1]] = fadd float [[TMP16]], [[ADD_PHI2]] +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 +; CHECK-ORDERED-NEXT: [[ADD1]] = fadd float [[TMP14]], [[ADD_PHI2]] ; CHECK-ORDERED-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[ARRAYIDXB2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OR]] -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 -; CHECK-ORDERED-NEXT: [[ADD2]] = fadd float [[TMP17]], [[ADD_PHI1]] +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 +; CHECK-ORDERED-NEXT: [[ADD2]] = fadd float [[TMP15]], [[ADD_PHI1]] ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 2 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 ; CHECK-ORDERED-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 ; CHECK-ORDERED-NEXT: ret void @@ -631,40 +607,34 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP5]] -; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP2]], [[TMP9]] -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[TMP2]], [[TMP6]] +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP2]], [[TMP6]] +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ [[A2]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI1:%.*]] = phi float [ [[A1]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX]] +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-ORDERED-TF-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 8 x i1> @llvm.vector.interleave2.nxv8i1(<vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP13]], i32 4, <vscale x 8 x i1> [[INTERLEAVED_MASK]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP10]], i32 4, <vscale x 8 x i1> [[INTERLEAVED_MASK]], <vscale x 8 x float> poison) ; CHECK-ORDERED-TF-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_MASKED_VEC]]) -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0 -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP15]], <vscale x 4 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP17]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP16]]) -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], <vscale x 4 x float> [[TMP18]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP12]]) -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = extractelement <vscale x 4 x i1> [[TMP20]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0 +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1 +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP12]], <vscale x 4 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP13]]) +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP11]], <vscale x 4 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP16]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], <vscale x 4 x float> [[TMP15]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -677,18 +647,18 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[ADD_PHI2:%.*]] = phi float [ [[A1]], [[SCALAR_PH]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXB1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD1]] = fadd float [[TMP22]], [[ADD_PHI2]] +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD1]] = fadd float [[TMP19]], [[ADD_PHI2]] ; CHECK-ORDERED-TF-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXB2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OR]] -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD2]] = fadd float [[TMP23]], [[ADD_PHI1]] +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD2]] = fadd float [[TMP20]], [[ADD_PHI1]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 2 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 ; CHECK-ORDERED-TF-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 ; CHECK-ORDERED-TF-NEXT: ret void @@ -770,43 +740,41 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = fadd <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-UNORDERED-NEXT: [[TMP10]] = fadd <vscale x 4 x float> [[VEC_PHI]], [[TMP9]] -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP5]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = fadd <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] +; CHECK-UNORDERED-NEXT: [[TMP8]] = fadd <vscale x 4 x float> [[VEC_PHI]], [[TMP7]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP10]]) +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP8]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-UNORDERED-NEXT: [[RES_014:%.*]] = phi float [ [[RDX:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 ; CHECK-UNORDERED-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -; CHECK-UNORDERED-NEXT: [[ADD:%.*]] = fadd float [[TMP13]], [[TMP14]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +; CHECK-UNORDERED-NEXT: [[ADD:%.*]] = fadd float [[TMP11]], [[TMP12]] ; CHECK-UNORDERED-NEXT: [[RDX]] = fadd float [[RES_014]], [[ADD]] ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-UNORDERED: for.end.loopexit: -; CHECK-UNORDERED-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_END]] ; CHECK-UNORDERED: for.end: ; CHECK-UNORDERED-NEXT: [[RES:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[RDX_LCSSA]], [[FOR_END_LOOPEXIT]] ] @@ -829,42 +797,40 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4 -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = fadd <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-ORDERED-NEXT: [[TMP10]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP9]]) -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP5]], align 4 +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = fadd <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] +; CHECK-ORDERED-NEXT: [[TMP8]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP7]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-ORDERED-NEXT: [[RES_014:%.*]] = phi float [ [[RDX:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 ; CHECK-ORDERED-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -; CHECK-ORDERED-NEXT: [[ADD:%.*]] = fadd float [[TMP12]], [[TMP13]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +; CHECK-ORDERED-NEXT: [[ADD:%.*]] = fadd float [[TMP10]], [[TMP11]] ; CHECK-ORDERED-NEXT: [[RDX]] = fadd float [[RES_014]], [[ADD]] ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-ORDERED: for.end.loopexit: -; CHECK-ORDERED-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_END]] ; CHECK-ORDERED: for.end: ; CHECK-ORDERED-NEXT: [[RES:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[RDX_LCSSA]], [[FOR_END_LOOPEXIT]] ] @@ -882,35 +848,29 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 -; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP3]] -; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] -; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = sub i64 [[N]], [[TMP7]] -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], [[TMP7]] -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 [[TMP8]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[N]], [[TMP4]] +; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i64 [[TMP5]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = fadd <vscale x 4 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP13]], <vscale x 4 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP15]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP14]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP10]]) -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x i1> [[TMP16]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP9]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = fadd <vscale x 4 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP10]], <vscale x 4 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP11]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP7]]) +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[TMP13]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -921,16 +881,16 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] ; CHECK-ORDERED-TF-NEXT: [[RES_014:%.*]] = phi float [ [[RDX:%.*]], [[FOR_BODY]] ], [ 0.000000e+00, [[SCALAR_PH]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD:%.*]] = fadd float [[TMP18]], [[TMP19]] +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +; CHECK-ORDERED-TF-NEXT: [[ADD:%.*]] = fadd float [[TMP15]], [[TMP16]] ; CHECK-ORDERED-TF-NEXT: [[RDX]] = fadd float [[RES_014]], [[ADD]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-ORDERED-TF: for.end.loopexit: -; CHECK-ORDERED-TF-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END]] ; CHECK-ORDERED-TF: for.end: ; CHECK-ORDERED-TF-NEXT: [[RES:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[RDX_LCSSA]], [[FOR_END_LOOPEXIT]] ] @@ -1002,49 +962,47 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> splat (float -0.000000e+00), float 1.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], zeroinitializer -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> poison) -; CHECK-UNORDERED-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00) -; CHECK-UNORDERED-NEXT: [[TMP9]] = fadd <vscale x 4 x float> [[VEC_PHI]], [[PREDPHI]] -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> splat (float -0.000000e+00), float 1.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP4]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], zeroinitializer +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP6]], i32 4, <vscale x 4 x i1> [[TMP5]], <vscale x 4 x float> poison) +; CHECK-UNORDERED-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00) +; CHECK-UNORDERED-NEXT: [[TMP7]] = fadd <vscale x 4 x float> [[VEC_PHI]], [[PREDPHI]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP9]]) +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP7]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-UNORDERED-NEXT: [[RES:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[FADD:%.*]], [[FOR_INC]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-UNORDERED-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP12]], 0.000000e+00 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP10]], 0.000000e+00 ; CHECK-UNORDERED-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK-UNORDERED: if.then: ; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 ; CHECK-UNORDERED-NEXT: br label [[FOR_INC]] ; CHECK-UNORDERED: for.inc: -; CHECK-UNORDERED-NEXT: [[PHI:%.*]] = phi float [ [[TMP13]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[PHI:%.*]] = phi float [ [[TMP11]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[FADD]] = fadd float [[RES]], [[PHI]] ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[RDX]] ; ; CHECK-ORDERED-LABEL: define float @fadd_conditional @@ -1059,48 +1017,46 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], zeroinitializer -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> poison) -; CHECK-ORDERED-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00) -; CHECK-ORDERED-NEXT: [[TMP9]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[PREDPHI]]) -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP4]], align 4 +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], zeroinitializer +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP6]], i32 4, <vscale x 4 x i1> [[TMP5]], <vscale x 4 x float> poison) +; CHECK-ORDERED-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00) +; CHECK-ORDERED-NEXT: [[TMP7]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[PREDPHI]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-ORDERED-NEXT: [[RES:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[FADD:%.*]], [[FOR_INC]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP11]], 0.000000e+00 +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP9]], 0.000000e+00 ; CHECK-ORDERED-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK-ORDERED: if.then: ; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 ; CHECK-ORDERED-NEXT: br label [[FOR_INC]] ; CHECK-ORDERED: for.inc: -; CHECK-ORDERED-NEXT: [[PHI:%.*]] = phi float [ [[TMP12]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[PHI:%.*]] = phi float [ [[TMP10]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[FADD]] = fadd float [[RES]], [[PHI]] ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[RDX]] ; ; CHECK-ORDERED-TF-LABEL: define float @fadd_conditional @@ -1110,37 +1066,31 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] -; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 -; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] +; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i1> zeroinitializer -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP13]], i32 4, <vscale x 4 x i1> [[TMP12]], <vscale x 4 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x float> [[WIDE_MASKED_LOAD1]], <vscale x 4 x float> splat (float 3.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[PREDPHI]], <vscale x 4 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP15]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP14]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x i1> [[TMP16]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_MASKED_LOAD]], zeroinitializer +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i1> zeroinitializer +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_MASKED_LOAD1]], <vscale x 4 x float> splat (float 3.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[PREDPHI]], <vscale x 4 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP11]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[TMP13]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -1151,21 +1101,21 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-ORDERED-TF-NEXT: [[RES:%.*]] = phi float [ 1.000000e+00, [[SCALAR_PH]] ], [ [[FADD:%.*]], [[FOR_INC]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-TF-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP18]], 0.000000e+00 +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP15]], 0.000000e+00 ; CHECK-ORDERED-TF-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK-ORDERED-TF: if.then: ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 ; CHECK-ORDERED-TF-NEXT: br label [[FOR_INC]] ; CHECK-ORDERED-TF: for.inc: -; CHECK-ORDERED-TF-NEXT: [[PHI:%.*]] = phi float [ [[TMP19]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[PHI:%.*]] = phi float [ [[TMP16]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[FADD]] = fadd float [[RES]], [[PHI]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[RDX]] ; @@ -1234,43 +1184,41 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float -0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP6]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = fadd <vscale x 8 x float> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, ptr [[TMP8]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP9]] = fadd <vscale x 8 x float> [[TMP7]], [[WIDE_LOAD1]] -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float -0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP4]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = fadd <vscale x 8 x float> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, ptr [[TMP6]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP7]] = fadd <vscale x 8 x float> [[TMP5]], [[WIDE_LOAD1]] +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[TMP9]]) +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[TMP7]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ -0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ -0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-UNORDERED-NEXT: [[ADD:%.*]] = fadd float [[SUM]], [[TMP12]] +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[ADD:%.*]] = fadd float [[SUM]], [[TMP10]] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-UNORDERED-NEXT: [[ADD3]] = fadd float [[ADD]], [[TMP13]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-UNORDERED-NEXT: [[ADD3]] = fadd float [[ADD]], [[TMP11]] ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[RDX:%.*]] = phi float [ [[ADD3]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[RDX:%.*]] = phi float [ [[ADD3]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[RDX]] ; ; CHECK-ORDERED-LABEL: define float @fadd_multiple @@ -1371,74 +1319,72 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP8]] -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 16 -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP11]] -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 24 -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP14]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP6]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP9]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP12]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP15]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 16 -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP21]] -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 24 -; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP24]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 8 x float>, ptr [[TMP16]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 8 x float>, ptr [[TMP19]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 8 x float>, ptr [[TMP22]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 8 x float>, ptr [[TMP25]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP26]] = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD]], <vscale x 8 x float> [[WIDE_LOAD7]], <vscale x 8 x float> [[VEC_PHI]]) -; CHECK-UNORDERED-NEXT: [[TMP27]] = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD4]], <vscale x 8 x float> [[WIDE_LOAD8]], <vscale x 8 x float> [[VEC_PHI1]]) -; CHECK-UNORDERED-NEXT: [[TMP28]] = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD5]], <vscale x 8 x float> [[WIDE_LOAD9]], <vscale x 8 x float> [[VEC_PHI2]]) -; CHECK-UNORDERED-NEXT: [[TMP29]] = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD6]], <vscale x 8 x float> [[WIDE_LOAD10]], <vscale x 8 x float> [[VEC_PHI3]]) -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP12]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP4]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP7]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP10]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP13]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP16]] +; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16 +; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP19]] +; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 24 +; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP22]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 8 x float>, ptr [[TMP14]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 8 x float>, ptr [[TMP17]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 8 x float>, ptr [[TMP20]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 8 x float>, ptr [[TMP23]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP24]] = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD]], <vscale x 8 x float> [[WIDE_LOAD7]], <vscale x 8 x float> [[VEC_PHI]]) +; CHECK-UNORDERED-NEXT: [[TMP25]] = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD4]], <vscale x 8 x float> [[WIDE_LOAD8]], <vscale x 8 x float> [[VEC_PHI1]]) +; CHECK-UNORDERED-NEXT: [[TMP26]] = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD5]], <vscale x 8 x float> [[WIDE_LOAD9]], <vscale x 8 x float> [[VEC_PHI2]]) +; CHECK-UNORDERED-NEXT: [[TMP27]] = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD6]], <vscale x 8 x float> [[WIDE_LOAD10]], <vscale x 8 x float> [[VEC_PHI3]]) +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd <vscale x 8 x float> [[TMP27]], [[TMP26]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd <vscale x 8 x float> [[TMP28]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd <vscale x 8 x float> [[TMP29]], [[BIN_RDX11]] -; CHECK-UNORDERED-NEXT: [[TMP31:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[BIN_RDX12]]) +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd <vscale x 8 x float> [[TMP25]], [[TMP24]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd <vscale x 8 x float> [[TMP26]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd <vscale x 8 x float> [[TMP27]], [[BIN_RDX11]] +; CHECK-UNORDERED-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[BIN_RDX12]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP29]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP32:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP30:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP32]], float [[TMP33]], float [[SUM_07]]) +; CHECK-UNORDERED-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP30]], float [[TMP31]], float [[SUM_07]]) ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fmuladd_strict @@ -1453,71 +1399,69 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP33:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP8]] -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 16 -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP11]] -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 24 -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP14]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP6]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, ptr [[TMP9]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x float>, ptr [[TMP12]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x float>, ptr [[TMP15]], align 4 -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 16 -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP21]] -; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 24 -; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP24]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP16]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP19]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP22]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 8 x float>, ptr [[TMP25]], align 4 -; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]] -; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD2]], [[WIDE_LOAD6]] -; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP26]]) -; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP30]], <vscale x 8 x float> [[TMP27]]) -; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP31]], <vscale x 8 x float> [[TMP28]]) -; CHECK-ORDERED-NEXT: [[TMP33]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP32]], <vscale x 8 x float> [[TMP29]]) -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP12]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP4]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, ptr [[TMP7]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x float>, ptr [[TMP10]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x float>, ptr [[TMP13]], align 4 +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP16]] +; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16 +; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP19]] +; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 24 +; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP22]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP14]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP17]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP20]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 8 x float>, ptr [[TMP23]], align 4 +; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]] +; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD1]], [[WIDE_LOAD5]] +; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD3]], [[WIDE_LOAD7]] +; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP24]]) +; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP28]], <vscale x 8 x float> [[TMP25]]) +; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP29]], <vscale x 8 x float> [[TMP26]]) +; CHECK-ORDERED-NEXT: [[TMP31]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP30]], <vscale x 8 x float> [[TMP27]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP33]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP35]], float [[TMP36]], float [[SUM_07]]) +; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP33]], float [[TMP34]], float [[SUM_07]]) ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fmuladd_strict @@ -1527,26 +1471,20 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] -; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 8 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]] -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 16 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]] -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 24 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP15]] +; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] +; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 16 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP12]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) @@ -1558,64 +1496,64 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT17:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP44:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP15]] ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP18]] ; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP21]] -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP24]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP16]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP19]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP22]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP25]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP21]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP13]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP16]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP19]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP22]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP25]] ; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[TMP28]] +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP28]] ; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul nuw i64 [[TMP30]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[TMP31]] -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = mul nuw i64 [[TMP33]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[TMP34]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP26]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP29]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP32]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP35]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[TMP36]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP40]]) -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[TMP37]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP41]], <vscale x 8 x float> [[TMP42]]) -; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[TMP38]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP43]], <vscale x 8 x float> [[TMP44]]) -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[TMP39]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP47]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP45]], <vscale x 8 x float> [[TMP46]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul nuw i64 [[TMP30]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP31]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP23]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP26]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP29]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP32]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[TMP33]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP37]]) +; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[TMP34]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP38]], <vscale x 8 x float> [[TMP39]]) +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[TMP35]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP40]], <vscale x 8 x float> [[TMP41]]) +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[TMP36]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP44]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP42]], <vscale x 8 x float> [[TMP43]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = mul nuw i64 [[TMP45]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = add i64 [[INDEX]], [[TMP46]] ; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 16 ; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], [[TMP49]] ; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul nuw i64 [[TMP51]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul nuw i64 [[TMP51]], 24 ; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], [[TMP52]] -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = mul nuw i64 [[TMP54]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], [[TMP55]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP50]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP56]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = extractelement <vscale x 8 x i1> [[TMP57]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP58]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP47]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP50]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = extractelement <vscale x 8 x i1> [[TMP54]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -1626,15 +1564,15 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP59]], float [[TMP60]], float [[SUM_07]]) +; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP56]], float [[TMP57]], float [[SUM_07]]) ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP47]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[MULADD_LCSSA]] ; @@ -1693,74 +1631,72 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP8]] -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 16 -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP11]] -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 24 -; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP14]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP6]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP9]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP12]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP15]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 -; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] -; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 16 -; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP21]] -; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 24 -; CHECK-UNORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP24]] -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 8 x float>, ptr [[TMP16]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 8 x float>, ptr [[TMP19]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 8 x float>, ptr [[TMP22]], align 4 -; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 8 x float>, ptr [[TMP25]], align 4 -; CHECK-UNORDERED-NEXT: [[TMP26]] = call nnan <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD]], <vscale x 8 x float> [[WIDE_LOAD7]], <vscale x 8 x float> [[VEC_PHI]]) -; CHECK-UNORDERED-NEXT: [[TMP27]] = call nnan <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD4]], <vscale x 8 x float> [[WIDE_LOAD8]], <vscale x 8 x float> [[VEC_PHI1]]) -; CHECK-UNORDERED-NEXT: [[TMP28]] = call nnan <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD5]], <vscale x 8 x float> [[WIDE_LOAD9]], <vscale x 8 x float> [[VEC_PHI2]]) -; CHECK-UNORDERED-NEXT: [[TMP29]] = call nnan <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD6]], <vscale x 8 x float> [[WIDE_LOAD10]], <vscale x 8 x float> [[VEC_PHI3]]) -; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-UNORDERED-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-UNORDERED-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP12]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP4]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP7]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP10]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP13]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP16]] +; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16 +; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP19]] +; CHECK-UNORDERED-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 24 +; CHECK-UNORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP22]] +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 8 x float>, ptr [[TMP14]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 8 x float>, ptr [[TMP17]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 8 x float>, ptr [[TMP20]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 8 x float>, ptr [[TMP23]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP24]] = call nnan <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD]], <vscale x 8 x float> [[WIDE_LOAD7]], <vscale x 8 x float> [[VEC_PHI]]) +; CHECK-UNORDERED-NEXT: [[TMP25]] = call nnan <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD4]], <vscale x 8 x float> [[WIDE_LOAD8]], <vscale x 8 x float> [[VEC_PHI1]]) +; CHECK-UNORDERED-NEXT: [[TMP26]] = call nnan <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD5]], <vscale x 8 x float> [[WIDE_LOAD9]], <vscale x 8 x float> [[VEC_PHI2]]) +; CHECK-UNORDERED-NEXT: [[TMP27]] = call nnan <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> [[WIDE_LOAD6]], <vscale x 8 x float> [[WIDE_LOAD10]], <vscale x 8 x float> [[VEC_PHI3]]) +; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-UNORDERED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-UNORDERED-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan <vscale x 8 x float> [[TMP27]], [[TMP26]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd nnan <vscale x 8 x float> [[TMP28]], [[BIN_RDX]] -; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd nnan <vscale x 8 x float> [[TMP29]], [[BIN_RDX11]] -; CHECK-UNORDERED-NEXT: [[TMP31:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[BIN_RDX12]]) +; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan <vscale x 8 x float> [[TMP25]], [[TMP24]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd nnan <vscale x 8 x float> [[TMP26]], [[BIN_RDX]] +; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd nnan <vscale x 8 x float> [[TMP27]], [[BIN_RDX11]] +; CHECK-UNORDERED-NEXT: [[TMP29:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, <vscale x 8 x float> [[BIN_RDX12]]) ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP29]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP32:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-UNORDERED-NEXT: [[TMP30:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-UNORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-UNORDERED-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP32]], float [[TMP33]], float [[SUM_07]]) +; CHECK-UNORDERED-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-UNORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP30]], float [[TMP31]], float [[SUM_07]]) ; CHECK-UNORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-UNORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-UNORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-UNORDERED: for.end: -; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] +; CHECK-UNORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] ; CHECK-UNORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-LABEL: define float @fmuladd_strict_fmf @@ -1775,71 +1711,69 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP33:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP8]] -; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 16 -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP11]] -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 24 -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP14]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP6]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, ptr [[TMP9]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x float>, ptr [[TMP12]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x float>, ptr [[TMP15]], align 4 -; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 -; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] -; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 16 -; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP21]] -; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 24 -; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP24]] -; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP16]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP19]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP22]], align 4 -; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 8 x float>, ptr [[TMP25]], align 4 -; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]] -; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD2]], [[WIDE_LOAD6]] -; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP26]]) -; CHECK-ORDERED-NEXT: [[TMP31:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP30]], <vscale x 8 x float> [[TMP27]]) -; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP31]], <vscale x 8 x float> [[TMP28]]) -; CHECK-ORDERED-NEXT: [[TMP33]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP32]], <vscale x 8 x float> [[TMP29]]) -; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-ORDERED-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 +; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP9]] +; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[TMP12]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, ptr [[TMP4]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, ptr [[TMP7]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x float>, ptr [[TMP10]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x float>, ptr [[TMP13]], align 4 +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +; CHECK-ORDERED-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP16]] +; CHECK-ORDERED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16 +; CHECK-ORDERED-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP19]] +; CHECK-ORDERED-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 24 +; CHECK-ORDERED-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP22]] +; CHECK-ORDERED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x float>, ptr [[TMP14]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 8 x float>, ptr [[TMP17]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 8 x float>, ptr [[TMP20]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 8 x float>, ptr [[TMP23]], align 4 +; CHECK-ORDERED-NEXT: [[TMP24:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]] +; CHECK-ORDERED-NEXT: [[TMP25:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD1]], [[WIDE_LOAD5]] +; CHECK-ORDERED-NEXT: [[TMP26:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; CHECK-ORDERED-NEXT: [[TMP27:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD3]], [[WIDE_LOAD7]] +; CHECK-ORDERED-NEXT: [[TMP28:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP24]]) +; CHECK-ORDERED-NEXT: [[TMP29:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP28]], <vscale x 8 x float> [[TMP25]]) +; CHECK-ORDERED-NEXT: [[TMP30:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP29]], <vscale x 8 x float> [[TMP26]]) +; CHECK-ORDERED-NEXT: [[TMP31]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP30]], <vscale x 8 x float> [[TMP27]]) +; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-ORDERED-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-ORDERED-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED: middle.block: ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: ; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP33]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP35]], float [[TMP36]], float [[SUM_07]]) +; CHECK-ORDERED-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP33]], float [[TMP34]], float [[SUM_07]]) ; CHECK-ORDERED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-ORDERED: for.end: -; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-NEXT: ret float [[MULADD_LCSSA]] ; ; CHECK-ORDERED-TF-LABEL: define float @fmuladd_strict_fmf @@ -1849,26 +1783,20 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF: vector.ph: ; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] -; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 32 -; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 8 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]] -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 16 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]] -; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 24 -; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP15]] +; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 +; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]] +; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], [[TMP3]] +; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0 +; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP8]] +; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 16 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP10]] +; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 24 +; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP12]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) @@ -1880,64 +1808,64 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY3]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT17:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP44:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP15]] ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] +; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP18]] ; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP21]] -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP24]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP16]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP19]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP22]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP25]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP21]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP13]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP16]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP19]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP22]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP25]] ; CHECK-ORDERED-TF-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 8 -; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[TMP28]] +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP28]] ; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul nuw i64 [[TMP30]], 16 -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[TMP31]] -; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = mul nuw i64 [[TMP33]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[TMP34]] -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP26]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP29]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP32]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP35]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] -; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] -; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] -; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] -; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[TMP36]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP40]]) -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[TMP37]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP41]], <vscale x 8 x float> [[TMP42]]) -; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[TMP38]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP43]], <vscale x 8 x float> [[TMP44]]) -; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[TMP39]], <vscale x 8 x float> splat (float -0.000000e+00) -; CHECK-ORDERED-TF-NEXT: [[TMP47]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP45]], <vscale x 8 x float> [[TMP46]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = mul nuw i64 [[TMP30]], 24 +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[TMP31]] +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP23]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP26]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP29]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP32]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) +; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD12]] +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] +; CHECK-ORDERED-TF-NEXT: [[TMP35:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] +; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] +; CHECK-ORDERED-TF-NEXT: [[TMP37:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[TMP33]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP38:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP37]]) +; CHECK-ORDERED-TF-NEXT: [[TMP39:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[TMP34]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP38]], <vscale x 8 x float> [[TMP39]]) +; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[TMP35]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP40]], <vscale x 8 x float> [[TMP41]]) +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[TMP36]], <vscale x 8 x float> splat (float -0.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP44]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP42]], <vscale x 8 x float> [[TMP43]]) +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = mul nuw i64 [[TMP45]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = add i64 [[INDEX]], [[TMP46]] ; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8 +; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 16 ; CHECK-ORDERED-TF-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], [[TMP49]] ; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul nuw i64 [[TMP51]], 16 +; CHECK-ORDERED-TF-NEXT: [[TMP52:%.*]] = mul nuw i64 [[TMP51]], 24 ; CHECK-ORDERED-TF-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], [[TMP52]] -; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = mul nuw i64 [[TMP54]], 24 -; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], [[TMP55]] -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP50]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP56]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) -; CHECK-ORDERED-TF-NEXT: [[TMP58:%.*]] = extractelement <vscale x 8 x i1> [[TMP57]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP58]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP47]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP50]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP6]]) +; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = extractelement <vscale x 8 x i1> [[TMP54]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] ; CHECK-ORDERED-TF: scalar.ph: @@ -1948,15 +1876,15 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP59:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP59]], float [[TMP60]], float [[SUM_07]]) +; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP56]], float [[TMP57]], float [[SUM_07]]) ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP47]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[MULADD_LCSSA]] ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index eaf85694..4a12aef 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -22,8 +22,6 @@ define i64 @same_exit_block_pre_inc_use1() #1 { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 3, [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -34,7 +32,7 @@ define i64 @same_exit_block_pre_inc_use1() #1 { ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP13]], align 1 ; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP3]] ; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP16]]) ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]] ; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]] @@ -264,8 +262,6 @@ define i64 @loop_contains_safe_div() #1 { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP10]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] ; CHECK-NEXT: [[INDEX1:%.*]] = sub i64 64, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -275,7 +271,7 @@ define i64 @loop_contains_safe_div() #1 { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[TMP13:%.*]] = udiv <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 20000) ; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <vscale x 4 x i32> [[TMP13]], splat (i32 1) -; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX2]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX2]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[INDEX1]] ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll index 070f658..cc83819 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll @@ -22,8 +22,6 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 64 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 510, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 510, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 64 ; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add i64 3, [[N_VEC]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: vector.body: @@ -61,7 +59,7 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD2]], [[WIDE_LOAD6]] ; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]] ; CHECK-NEXT: [[TMP59:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD4]], [[WIDE_LOAD8]] -; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP3]] ; CHECK-NEXT: [[TMP34:%.*]] = or <vscale x 16 x i1> [[TMP32]], [[TMP30]] ; CHECK-NEXT: [[TMP37:%.*]] = or <vscale x 16 x i1> [[TMP34]], [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = or <vscale x 16 x i1> [[TMP37]], [[TMP59]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index d32b898..cc88946 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -23,8 +23,6 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 101, [[TMP5]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 101, [[N_MOD_VF]] -; DEFAULT-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 32 ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -34,7 +32,7 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP9]], i64 [[TMP23]] ; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP9]], align 1 ; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP24]], align 1 -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DEFAULT: middle.block: @@ -52,14 +50,12 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 8 ; DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 101, [[TMP15]] ; DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 101, [[N_MOD_VF2]] -; DEFAULT-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8 ; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; DEFAULT: vec.epilog.vector.body: ; DEFAULT-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX5]] ; DEFAULT-NEXT: store <vscale x 8 x i8> zeroinitializer, ptr [[TMP19]], align 1 -; DEFAULT-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP17]] +; DEFAULT-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP15]] ; DEFAULT-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; DEFAULT-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; DEFAULT: vec.epilog.middle.block: @@ -85,12 +81,6 @@ define void @cost_store_i8(ptr %dst) #0 { ; PRED: vector.ph: ; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; PRED-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 101, [[TMP4]] -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; PRED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; PRED-NEXT: [[TMP9:%.*]] = sub i64 101, [[TMP8]] @@ -103,7 +93,7 @@ define void @cost_store_i8(ptr %dst) #0 { ; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; PRED-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> zeroinitializer, ptr [[TMP13]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]]) ; PRED-NEXT: [[TMP15:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP16:%.*]] = extractelement <vscale x 16 x i1> [[TMP15]], i32 0 @@ -227,12 +217,6 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; PRED: vector.ph: ; PRED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP10]], 2 -; PRED-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 1000, [[TMP2]] -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; PRED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer ; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1000) @@ -248,7 +232,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; PRED-NEXT: [[TMP9:%.*]] = and <vscale x 2 x i8> [[TMP8]], [[TMP11]] ; PRED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; PRED-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP9]], ptr [[TMP5]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META7:![0-9]+]], !noalias [[META4]] -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1000) ; PRED-NEXT: [[TMP12:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x i1> [[TMP12]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll index c721493..bdbbfdf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll @@ -82,8 +82,8 @@ define void @struct_return_replicate(ptr noalias %in, ptr noalias writeonly %out ; CHECK: [[ENTRY:.*:]] ; CHECK: [[VECTOR_PH:.*:]] ; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] -; CHECK: [[TMP6:%.*]] = tail call { half, half } @foo(half [[TMP5:%.*]]) #[[ATTR3]] +; CHECK: [[TMP2:%.*]] = tail call { half, half } @foo(half [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] +; CHECK: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP3:%.*]]) #[[ATTR3]] ; CHECK: [[MIDDLE_BLOCK:.*:]] ; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] @@ -149,10 +149,9 @@ exit: define void @struct_return_scalable(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) #2 { ; CHECK-LABEL: define void @struct_return_scalable( ; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK: [[ENTRY:.*:]] -; CHECK: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK: [[VECTOR_PH:.*:]] ; CHECK: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK: [[VECTOR_PH1:.*:]] ; CHECK: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK: [[VECTOR_BODY:.*:]] ; CHECK: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll index 67f5083..495f9c0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll @@ -10,11 +10,10 @@ define void @cond_inv_load_i32i32i16(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -69,11 +68,10 @@ define void @cond_inv_load_f64f64f64(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -126,11 +124,10 @@ define void @invariant_load_cond(ptr noalias nocapture %a, ptr nocapture readonl ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 168 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll index e555785..d89c525 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -19,8 +19,6 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -36,7 +34,7 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: [[TMP17]] = and i64 [[VEC_PHI]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> [[WIDE_LOAD3]]) ; CHECK-NEXT: [[TMP19]] = and i64 [[VEC_PHI2]], [[TMP18]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll index af9c39e..5e22532 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -19,8 +19,6 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -34,7 +32,7 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 2 x i64>, ptr [[TMP15]], align 8 ; CHECK-NEXT: [[TMP16]] = add <vscale x 2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP17]] = add <vscale x 2 x i64> [[WIDE_LOAD3]], [[VEC_PHI2]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll index fca29cd..52117e3f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -19,8 +19,6 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -33,7 +31,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[TMP18]], <vscale x 4 x float> [[WIDE_LOAD2]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 0f407cd..a521bfa 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -35,8 +35,6 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 32 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -46,7 +44,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 [[TMP18]] ; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP14]], align 1 ; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP19]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -64,14 +62,12 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 8 ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 1024, [[TMP24]] ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 1024, [[N_MOD_VF2]] -; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = mul nuw i64 [[TMP25]], 8 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX5]] ; CHECK-NEXT: store <vscale x 8 x i8> splat (i8 1), ptr [[TMP28]], align 1 -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP26]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP24]] ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -95,8 +91,6 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -106,7 +100,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[TMP16]] ; CHECK-VF8-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP12]], align 1 ; CHECK-VF8-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP17]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF8: middle.block: @@ -159,8 +153,6 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -170,7 +162,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP8]] ; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP6]], align 1 ; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP9]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -194,8 +186,6 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -205,7 +195,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP8]] ; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP6]], align 1 ; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP9]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-VF8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-VF8: middle.block: @@ -276,8 +266,6 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -287,7 +275,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] ; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP12]], align 1 ; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP17]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: @@ -331,8 +319,6 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -342,7 +328,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] ; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP12]], align 1 ; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP17]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-VF8: middle.block: @@ -406,8 +392,6 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 32 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -417,7 +401,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP14]], i64 [[TMP18]] ; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP14]], align 1 ; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP19]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -436,15 +420,13 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 8 ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 10000, [[TMP24]] ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 10000, [[N_MOD_VF2]] -; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = mul nuw i64 [[TMP25]], 8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC3]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX7]] ; CHECK-NEXT: store <vscale x 8 x i8> zeroinitializer, ptr [[TMP28]], align 1 -; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX7]], [[TMP26]] +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX7]], [[TMP24]] ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -469,8 +451,6 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -480,7 +460,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP16]] ; CHECK-VF8-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP12]], align 1 ; CHECK-VF8-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP17]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-VF8: middle.block: @@ -545,8 +525,6 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -569,7 +547,7 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]] ; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: @@ -587,8 +565,6 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP26]], 2 ; CHECK-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[N]], [[TMP27]] ; CHECK-NEXT: [[N_VEC6:%.*]] = sub i64 [[N]], [[N_MOD_VF5]] -; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], 2 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -598,7 +574,7 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 2 x float>, ptr [[TMP32]], align 4 ; CHECK-NEXT: [[TMP34:%.*]] = fmul <vscale x 2 x float> [[WIDE_LOAD8]], [[WIDE_LOAD9]] ; CHECK-NEXT: store <vscale x 2 x float> [[TMP34]], ptr [[TMP32]], align 4 -; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], [[TMP29]] +; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], [[TMP27]] ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC6]] ; CHECK-NEXT: br i1 [[TMP35]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -621,8 +597,6 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -645,7 +619,7 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP19]] ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP11]], align 4 ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP20]], align 4 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-VF8-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-VF8: middle.block: @@ -697,8 +671,6 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -721,7 +693,7 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]] ; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -739,8 +711,6 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP26]], 2 ; CHECK-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[N]], [[TMP27]] ; CHECK-NEXT: [[N_VEC6:%.*]] = sub i64 [[N]], [[N_MOD_VF5]] -; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], 2 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] @@ -750,7 +720,7 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 2 x float>, ptr [[TMP32]], align 4 ; CHECK-NEXT: [[TMP34:%.*]] = fmul <vscale x 2 x float> [[WIDE_LOAD8]], [[WIDE_LOAD9]] ; CHECK-NEXT: store <vscale x 2 x float> [[TMP34]], ptr [[TMP32]], align 4 -; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], [[TMP29]] +; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], [[TMP27]] ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC6]] ; CHECK-NEXT: br i1 [[TMP35]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -773,8 +743,6 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -797,7 +765,7 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP19]] ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP11]], align 4 ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP20]], align 4 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-VF8-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-VF8: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll index 24f93f0..863dae7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -31,8 +31,6 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -50,7 +48,7 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds half, ptr [[TMP18]], i64 [[TMP21]] ; CHECK-NEXT: store <vscale x 8 x half> [[TMP16]], ptr [[TMP18]], align 2 ; CHECK-NEXT: store <vscale x 8 x half> [[TMP17]], ptr [[TMP22]], align 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll index 663cf41..d336f5f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -10,11 +10,10 @@ define void @gather_nxv4i32_ind64(ptr noalias nocapture readonly %a, ptr noalias ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -62,11 +61,10 @@ define void @scatter_nxv4i32_ind32(ptr noalias nocapture %a, ptr noalias nocaptu ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -113,11 +111,10 @@ define void @scatter_inv_nxv4i32(ptr noalias nocapture %inv, ptr noalias nocaptu ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -166,11 +163,10 @@ define void @gather_inv_nxv4i32(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -228,14 +224,12 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_VEC]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP7]], i64 [[N_VEC]] ; CHECK-NEXT: [[N_VEC1:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[DOTIDX1:%.*]] = shl i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[DOTIDX1]] -; CHECK-NEXT: [[DOTIDX3:%.*]] = shl nuw nsw i64 [[TMP3]], 5 +; CHECK-NEXT: [[DOTIDX3:%.*]] = shl nuw nsw i64 [[TMP2]], 5 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[B]], i64 [[DOTIDX3]] ; CHECK-NEXT: [[DOTIDX4:%.*]] = shl i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[DOTIDX4]] @@ -251,7 +245,7 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP12]], i64 [[DOTIDX]] ; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], ptr [[TMP14]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC1]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index fefb5af..351da8b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -12,13 +12,11 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK-LABEL: define void @induction_i7( ; CHECK-SAME: ptr [[DST:%.*]]) ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer @@ -76,13 +74,11 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-LABEL: define void @induction_i3_zext( ; CHECK-SAME: ptr [[DST:%.*]]) ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll index 4f0637f..95836f8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll @@ -23,10 +23,8 @@ define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -37,7 +35,7 @@ define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> poison) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP9]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 47ce05d..fd0bc0b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -437,8 +437,6 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 512, [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -452,7 +450,7 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[INDEX]], 9223372036854775804 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[TMP7]] ; CHECK-NEXT: store <vscale x 4 x i32> [[TMP6]], ptr [[TMP8]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -508,8 +506,6 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP6]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -523,7 +519,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[INDEX]], 9223372036854775804 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[TMP15]] ; CHECK-NEXT: store <vscale x 4 x i32> [[TMP14]], ptr [[TMP16]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -803,12 +799,10 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -822,7 +816,7 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP15]], <vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> splat (i1 true)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] @@ -875,10 +869,8 @@ define i32 @PR27626_1(ptr %p, i64 %n) #1 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -896,7 +888,7 @@ define i32 @PR27626_1(ptr %p, i64 %n) #1 { ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC1]]) ; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC2]], 0 ; CHECK-NEXT: [[TMP17]] = add <vscale x 4 x i32> [[TMP16]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] @@ -952,12 +944,10 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -971,7 +961,7 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP15]], <vscale x 4 x ptr> [[TMP14]], i32 4, <vscale x 4 x i1> splat (i1 true)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] @@ -1025,10 +1015,8 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1047,7 +1035,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC1]]) ; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC2]], 0 ; CHECK-NEXT: [[TMP18]] = add <vscale x 4 x i32> [[TMP17]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] @@ -1103,11 +1091,10 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp samesign ult i64 [[TMP2]], [[TMP4]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP5]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNEG]] ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP7]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNOT]] ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer @@ -1180,11 +1167,10 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp samesign ult i64 [[TMP2]], [[TMP4]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP5]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNEG]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP8]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNOT]] ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP11]], 3 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0 @@ -1273,11 +1259,10 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP8]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNEG]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP10]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNOT]] ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP15:%.*]] = shl <vscale x 4 x i64> [[TMP14]], splat (i64 1) @@ -1372,10 +1357,8 @@ define void @interleave_deinterleave_factor3(ptr writeonly noalias %dst, ptr rea ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1402,7 +1385,7 @@ define void @interleave_deinterleave_factor3(ptr writeonly noalias %dst, ptr rea ; CHECK-NEXT: [[TMP17:%.*]] = shl <vscale x 4 x i32> [[TMP9]], [[TMP13]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i8, <vscale x 4 x ptr> [[TMP10]], i64 8 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP17]], <vscale x 4 x ptr> [[TMP25]], i32 4, <vscale x 4 x i1> splat (i1 true)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] @@ -1466,8 +1449,6 @@ define void @interleave_deinterleave(ptr writeonly noalias %dst, ptr readonly %a ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1492,7 +1473,7 @@ define void @interleave_deinterleave(ptr writeonly noalias %dst, ptr readonly %a ; CHECK-NEXT: [[TMP24:%.*]] = ashr <vscale x 4 x i32> [[TMP12]], [[TMP19]] ; CHECK-NEXT: [[INTERLEAVED_VEC13:%.*]] = call <vscale x 16 x i32> @llvm.vector.interleave4.nxv16i32(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x i32> [[TMP22]], <vscale x 4 x i32> [[TMP23]], <vscale x 4 x i32> [[TMP24]]) ; CHECK-NEXT: store <vscale x 16 x i32> [[INTERLEAVED_VEC13]], ptr [[TMP21]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index f152dd3..40ad5bb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -30,41 +30,39 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i32 [[TMP0]], 64 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALAR_TAIL_FOLDING: vector.ph: -; SCALAR_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP3]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP2]] ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl nuw i32 [[TMP4]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP2]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl i32 [[INDEX]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP9]] -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.masked.load.nxv32i8.p0(ptr [[TMP10]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK]], <vscale x 32 x i8> poison) +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[INDEX]], 1 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP6]] +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]]) +; SCALAR_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.masked.load.nxv32i8.p0(ptr [[TMP7]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK]], <vscale x 32 x i8> poison) ; SCALAR_TAIL_FOLDING-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[WIDE_MASKED_VEC]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP11]], <vscale x 16 x i8> [[TMP12]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = sext i32 [[TMP8]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP14]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP13]] -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[TMP13]], <vscale x 16 x i8> [[TMP16]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVED_VEC]], ptr [[TMP15]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK3]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]]) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = sext i32 [[TMP5]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP11]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP10]] +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[TMP10]], <vscale x 16 x i8> [[TMP13]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]]) +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVED_VEC]], ptr [[TMP12]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK3]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -80,42 +78,42 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMPA:%.*]] = sub i32 1024, [[TMP3]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMPB:%.*]] = icmp ult i32 [[TMP2]], 64 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = select i1 [[TMPB]], i32 [[TMPA]], i32 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = sub i32 1024, [[TMP3]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 64 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl i32 [[INDEX]], 1 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP9]] -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.masked.load.nxv32i8.p0(ptr [[TMP10]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK]], <vscale x 32 x i8> poison) +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP8]], <vscale x 16 x i1> zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = shl i32 [[INDEX]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP11]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP9]], <vscale x 16 x i1> [[TMP9]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.masked.load.nxv32i8.p0(ptr [[TMP12]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK]], <vscale x 32 x i8> poison) ; PREDICATED_TAIL_FOLDING-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[WIDE_MASKED_VEC]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP11]], <vscale x 16 x i8> [[TMP12]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = sext i32 [[TMP8]] to i64 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP14]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP13]] -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[TMP13]], <vscale x 16 x i8> [[TMP16]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]]) -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVED_VEC]], ptr [[TMP15]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK3]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP13]], <vscale x 16 x i8> [[TMP14]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = sext i32 [[TMP10]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP16]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP15]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[TMP15]], <vscale x 16 x i8> [[TMP18]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP9]], <vscale x 16 x i1> [[TMP9]]) +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVED_VEC]], ptr [[TMP17]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] -; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP17]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP19]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] ; PREDICATED_TAIL_FOLDING: scalar.ph: @@ -176,34 +174,32 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i32 [[TMP0]], 64 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALAR_TAIL_FOLDING: vector.ph: -; SCALAR_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP3]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP2]] ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl nuw i32 [[TMP4]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP2]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = zext nneg <vscale x 16 x i32> [[TMP7]] to <vscale x 16 x i64> -; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP8]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 1), <vscale x 16 x ptr> [[TMP9]], i32 1, <vscale x 16 x i1> splat (i1 true)) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = or disjoint <vscale x 16 x i32> [[TMP7]], splat (i32 1) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = zext nneg <vscale x 16 x i32> [[TMP11]] to <vscale x 16 x i64> -; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP12]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 2), <vscale x 16 x ptr> [[TMP13]], i32 1, <vscale x 16 x i1> [[TMP10]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = zext nneg <vscale x 16 x i32> [[TMP4]] to <vscale x 16 x i64> +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP5]] +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 1), <vscale x 16 x ptr> [[TMP6]], i32 1, <vscale x 16 x i1> splat (i1 true)) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = or disjoint <vscale x 16 x i32> [[TMP4]], splat (i32 1) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = zext nneg <vscale x 16 x i32> [[TMP8]] to <vscale x 16 x i64> +; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP9]] +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 2), <vscale x 16 x ptr> [[TMP10]], i32 1, <vscale x 16 x i1> [[TMP7]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -219,35 +215,35 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMPA:%.*]] = sub i32 1024, [[TMP3]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMPB:%.*]] = icmp ult i32 [[TMP2]], 64 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = select i1 [[TMPB]], i32 [[TMPA]], i32 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = sub i32 1024, [[TMP3]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 64 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = zext nneg <vscale x 16 x i32> [[TMP6]] to <vscale x 16 x i64> -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP7]] -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 1), <vscale x 16 x ptr> [[TMP8]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i1> zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = or disjoint <vscale x 16 x i32> [[TMP6]], splat (i32 1) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = zext nneg <vscale x 16 x i32> [[TMP11]] to <vscale x 16 x i64> -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP12]] -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 2), <vscale x 16 x ptr> [[TMP13]], i32 1, <vscale x 16 x i1> [[TMP10]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = zext nneg <vscale x 16 x i32> [[TMP8]] to <vscale x 16 x i64> +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP9]] +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 1), <vscale x 16 x ptr> [[TMP10]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP11]], <vscale x 16 x i1> zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = or disjoint <vscale x 16 x i32> [[TMP8]], splat (i32 1) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = zext nneg <vscale x 16 x i32> [[TMP13]] to <vscale x 16 x i64> +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP14]] +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 2), <vscale x 16 x ptr> [[TMP15]], i32 1, <vscale x 16 x i1> [[TMP12]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] -; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] ; PREDICATED_TAIL_FOLDING: scalar.ph: @@ -304,37 +300,35 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i32 [[TMP0]], 64 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALAR_TAIL_FOLDING: vector.ph: -; SCALAR_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP3]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP2]] ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl nuw i32 [[TMP4]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV3]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP2]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = zext nneg <vscale x 16 x i32> [[TMP7]] to <vscale x 16 x i64> -; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP9]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 1), <vscale x 16 x ptr> [[TMP10]], i32 1, <vscale x 16 x i1> [[TMP8]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = or disjoint <vscale x 16 x i32> [[TMP7]], splat (i32 1) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = zext nneg <vscale x 16 x i32> [[TMP12]] to <vscale x 16 x i64> -; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP13]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 2), <vscale x 16 x ptr> [[TMP14]], i32 1, <vscale x 16 x i1> [[TMP11]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = zext nneg <vscale x 16 x i32> [[TMP4]] to <vscale x 16 x i64> +; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP6]] +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 1), <vscale x 16 x ptr> [[TMP7]], i32 1, <vscale x 16 x i1> [[TMP5]]) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = or disjoint <vscale x 16 x i32> [[TMP4]], splat (i32 1) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = zext nneg <vscale x 16 x i32> [[TMP9]] to <vscale x 16 x i64> +; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP10]] +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 2), <vscale x 16 x ptr> [[TMP11]], i32 1, <vscale x 16 x i1> [[TMP8]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT4]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -351,39 +345,39 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMPA:%.*]] = sub i32 1024, [[TMP3]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMPB:%.*]] = icmp ult i32 [[TMP2]], 64 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = select i1 [[TMPB]], i32 [[TMPA]], i32 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = sub i32 1024, [[TMP3]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 64 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV3]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = zext nneg <vscale x 16 x i32> [[TMP6]] to <vscale x 16 x i64> -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP9]] -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 1), <vscale x 16 x ptr> [[TMP10]], i32 1, <vscale x 16 x i1> [[TMP8]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP11]], <vscale x 16 x i1> zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = or disjoint <vscale x 16 x i32> [[TMP6]], splat (i32 1) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = zext nneg <vscale x 16 x i32> [[TMP13]] to <vscale x 16 x i64> -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP14]] -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 2), <vscale x 16 x ptr> [[TMP15]], i32 1, <vscale x 16 x i1> [[TMP12]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i1> zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = zext nneg <vscale x 16 x i32> [[TMP8]] to <vscale x 16 x i64> +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP11]] +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 1), <vscale x 16 x ptr> [[TMP12]], i32 1, <vscale x 16 x i1> [[TMP10]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP13]], <vscale x 16 x i1> zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = or disjoint <vscale x 16 x i32> [[TMP8]], splat (i32 1) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = zext nneg <vscale x 16 x i32> [[TMP15]] to <vscale x 16 x i64> +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP16]] +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 2), <vscale x 16 x ptr> [[TMP17]], i32 1, <vscale x 16 x i1> [[TMP14]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] -; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT4]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP6:![0-9]+]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP6:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] ; PREDICATED_TAIL_FOLDING: scalar.ph: @@ -452,45 +446,43 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i32 [[TMP0]], 64 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALAR_TAIL_FOLDING: vector.ph: -; SCALAR_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP3]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP2]] ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl nuw i32 [[TMP4]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() -; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP2]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl i32 [[INDEX]], 2 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP9]] -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8.p0(ptr [[TMP10]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK]], <vscale x 64 x i8> poison) +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[INDEX]], 2 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP6]] +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]]) +; SCALAR_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8.p0(ptr [[TMP7]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK]], <vscale x 64 x i8> poison) ; SCALAR_TAIL_FOLDING-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave4.nxv64i8(<vscale x 64 x i8> [[WIDE_MASKED_VEC]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 2 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 3 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP11]], <vscale x 16 x i8> [[TMP12]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP15]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP13]], <vscale x 16 x i8> [[TMP14]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP17]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = sext i32 [[TMP8]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP19]] -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv64i8(<vscale x 16 x i8> [[TMP15]], <vscale x 16 x i8> [[TMP16]], <vscale x 16 x i8> [[TMP17]], <vscale x 16 x i8> [[TMP18]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0(<vscale x 64 x i8> [[INTERLEAVED_VEC]], ptr [[TMP20]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK3]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 2 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 3 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]]) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP12]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP10]], <vscale x 16 x i8> [[TMP11]]) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP14]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = sext i32 [[TMP5]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP16]] +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv64i8(<vscale x 16 x i8> [[TMP12]], <vscale x 16 x i8> [[TMP13]], <vscale x 16 x i8> [[TMP14]], <vscale x 16 x i8> [[TMP15]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]]) +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0(<vscale x 64 x i8> [[INTERLEAVED_VEC]], ptr [[TMP17]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK3]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -506,46 +498,46 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMPA:%.*]] = sub i32 1024, [[TMP3]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMPB:%.*]] = icmp ult i32 [[TMP2]], 64 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = select i1 [[TMPB]], i32 [[TMPA]], i32 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = sub i32 1024, [[TMP3]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 64 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = shl i32 [[INDEX]], 2 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP9]] -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8.p0(ptr [[TMP10]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK]], <vscale x 64 x i8> poison) +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP8]], <vscale x 16 x i1> zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = shl i32 [[INDEX]], 2 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP11]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP9]], <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i1> [[TMP9]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8.p0(ptr [[TMP12]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK]], <vscale x 64 x i8> poison) ; PREDICATED_TAIL_FOLDING-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave4.nxv64i8(<vscale x 64 x i8> [[WIDE_MASKED_VEC]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 2 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 3 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP11]], <vscale x 16 x i8> [[TMP12]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP15]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 2 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 3 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP13]], <vscale x 16 x i8> [[TMP14]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP17]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = sext i32 [[TMP8]] to i64 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP19]] -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv64i8(<vscale x 16 x i8> [[TMP15]], <vscale x 16 x i8> [[TMP16]], <vscale x 16 x i8> [[TMP17]], <vscale x 16 x i8> [[TMP18]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP7]]) -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0(<vscale x 64 x i8> [[INTERLEAVED_VEC]], ptr [[TMP20]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK3]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP15]], <vscale x 16 x i8> [[TMP16]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP19]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = sext i32 [[TMP10]] to i64 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP21]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv64i8(<vscale x 16 x i8> [[TMP17]], <vscale x 16 x i8> [[TMP18]], <vscale x 16 x i8> [[TMP19]], <vscale x 16 x i8> [[TMP20]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP9]], <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i1> [[TMP9]]) +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0(<vscale x 64 x i8> [[INTERLEAVED_VEC]], ptr [[TMP22]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] -; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP21]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP8:![0-9]+]] +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP23]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP8:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] ; PREDICATED_TAIL_FOLDING: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll index f0675a4..c8bbbdc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -15,8 +15,6 @@ define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N) ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -27,7 +25,7 @@ define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N) ; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <vscale x 4 x i16> [[WIDE_LOAD]], i32 [[TMP11]] ; CHECK-NEXT: store i16 [[TMP12]], ptr [[DST:%.*]], align 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -64,8 +62,6 @@ define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64 ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -75,7 +71,7 @@ define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP9]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index 2b4aad1..76f33cf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -19,8 +19,6 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 8 @@ -35,7 +33,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP30]], i64 [[TMP34]] ; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP30]], align 8 ; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP35]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll index 2b01018..49f9870 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll @@ -9,12 +9,6 @@ define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 7, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 7) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll index 993c048..26a1649 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll @@ -33,8 +33,6 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -50,7 +48,7 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP29]] ; CHECK-NEXT: store <vscale x 4 x i32> [[WIDE_LOAD]], ptr [[TMP25]], align 4 ; CHECK-NEXT: store <vscale x 4 x i32> [[WIDE_LOAD3]], ptr [[TMP30]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -105,8 +103,6 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -122,7 +118,7 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP29]] ; CHECK-NEXT: store <vscale x 4 x i32> [[WIDE_LOAD]], ptr [[TMP25]], align 4 ; CHECK-NEXT: store <vscale x 4 x i32> [[WIDE_LOAD3]], ptr [[TMP30]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll index 893ebef..b8f44f6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -46,8 +46,6 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP16]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP50:%.*]] = mul nuw i64 [[TMP49]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -77,7 +75,7 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[TMP45]], i64 [[TMP47]] ; CHECK-NEXT: store <vscale x 2 x i64> [[TMP35]], ptr [[TMP45]], align 8 ; CHECK-NEXT: store <vscale x 2 x i64> [[TMP36]], ptr [[TMP48]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP50]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 1cda568..10fe67d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -46,12 +46,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll index fb0447b..ed2c5cd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll @@ -10,12 +10,6 @@ define void @trip1025_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapt ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -28,7 +22,7 @@ define void @trip1025_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapt ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP11]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) ; CHECK-NEXT: [[TMP13:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD1]], [[TMP10]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP13]], ptr [[TMP11]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i1> [[TMP14]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index d6f8b8e..6b1b04a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -12,12 +12,6 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -65,12 +59,6 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; CHECK-IN-LOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-IN-LOOP: vector.ph: -; CHECK-IN-LOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-IN-LOOP-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-IN-LOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] -; CHECK-IN-LOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-IN-LOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -136,12 +124,6 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -188,12 +170,6 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; CHECK-IN-LOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-IN-LOOP: vector.ph: -; CHECK-IN-LOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-IN-LOOP-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-IN-LOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] -; CHECK-IN-LOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-IN-LOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-IN-LOOP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -257,17 +233,11 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) @@ -322,17 +292,11 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: entry: ; CHECK-IN-LOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-IN-LOOP: vector.ph: -; CHECK-IN-LOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-IN-LOOP-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-IN-LOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]] -; CHECK-IN-LOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-IN-LOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-IN-LOOP-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 -; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] +; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index 4ec7d4d..01b864b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -10,12 +10,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP61]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -101,12 +95,6 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP83:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP84:%.*]] = mul nuw i64 [[TMP83]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 672523e..e996535 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -12,12 +12,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -32,7 +26,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[TMP13]], i32 0 @@ -63,9 +57,6 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], 3 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[UMAX]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[UMAX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[TMP0]], i64 0 @@ -111,12 +102,6 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -131,7 +116,7 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], ptr [[TMP13]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i1> [[TMP15]], i32 0 @@ -169,12 +154,6 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP5]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] @@ -184,7 +163,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP15:%.*]] = mul <vscale x 4 x i64> [[TMP13]], splat (i64 4) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 4, [[TMP7]] +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 4, [[TMP4]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -196,7 +175,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP19]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST:%.*]], <vscale x 4 x i64> [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x ptr> [[TMP20]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) ; CHECK-NEXT: [[TMP21:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] @@ -232,12 +211,6 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -254,7 +227,7 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST:%.*]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x ptr> [[TMP14]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i1> [[TMP15]], i32 0 @@ -292,15 +265,9 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) @@ -313,7 +280,7 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0 @@ -351,15 +318,9 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) @@ -377,7 +338,7 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[PREDPHI]], ptr [[TMP16]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <vscale x 4 x i1> [[TMP18]], i32 0 @@ -423,15 +384,9 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) @@ -444,7 +399,7 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[TMP13]], i32 0 @@ -479,12 +434,6 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -501,7 +450,7 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) ; CHECK-NEXT: [[TMP15:%.*]] = fdiv <vscale x 4 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD2]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[TMP15]], ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x i1> [[TMP16]], i32 0 @@ -539,12 +488,6 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -562,7 +505,7 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD2]], <vscale x 4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP16:%.*]] = udiv <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[TMP15]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP16]], ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0 @@ -601,8 +544,6 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -610,7 +551,7 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: store <vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP7]], align 4 -; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index 33fa360..9989209 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -19,13 +19,11 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 3 ; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP4]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -98,13 +96,11 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP7]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll index 352f4fe..db941a3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll @@ -15,8 +15,6 @@ define void @vscale_mul_4(ptr noalias noundef readonly captures(none) %a, ptr no ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[A]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[B]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] @@ -70,8 +68,6 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP10]] @@ -141,8 +137,6 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -152,7 +146,7 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -211,8 +205,6 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -235,7 +227,7 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]] ; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -294,8 +286,6 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -318,7 +308,7 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]] ; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -379,8 +369,6 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -403,7 +391,7 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]] ; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -460,8 +448,6 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -484,7 +470,7 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]] ; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll index e214e82..b007db9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll @@ -16,8 +16,6 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2 ; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0 ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer @@ -30,7 +28,7 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 5c6328e..1012c10 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -26,8 +26,6 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP4]] ; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[START_2:%.*]], i64 [[N_VEC]] @@ -35,19 +33,19 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul <vscale x 2 x i64> [[TMP13]], splat (i64 1) -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP15]] +; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP7]], splat (i64 1) +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP8]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, <vscale x 2 x ptr> [[VECTOR_GEP]], i64 1 -; CHECK-NEXT: store <vscale x 2 x ptr> [[TMP16]], ptr [[NEXT_GEP]], align 8 -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP18]], align 1 -; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], splat (i8 1) -; CHECK-NEXT: store <vscale x 2 x i8> [[TMP20]], ptr [[TMP18]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, <vscale x 2 x ptr> [[VECTOR_GEP]], i64 1 +; CHECK-NEXT: store <vscale x 2 x ptr> [[TMP9]], ptr [[NEXT_GEP]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], splat (i8 1) +; CHECK-NEXT: store <vscale x 2 x i8> [[TMP12]], ptr [[TMP10]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP3]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -112,8 +110,6 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -126,8 +122,8 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store <vscale x 2 x i8> [[TMP17]], ptr [[TMP15]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP4]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 2c88e0e..11eef23 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -22,11 +22,10 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP5]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: [[TMP26:%.*]] = shl i64 [[N_VEC]], 3 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP26]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -134,11 +133,10 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 3 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP6]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[N_VEC]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 2 @@ -225,11 +223,10 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp samesign ult i64 [[SMAX]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -2 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], [[DOTNEG]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP6]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], [[DOTNOT]] ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[N_VEC]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 3 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll index 5848d31..c54511e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll @@ -22,8 +22,6 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 % ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -32,7 +30,7 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 % ; CHECK-NEXT: [[TMP14:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD1]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP14]] ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP15]], i32 1, <vscale x 4 x i1> splat (i1 true)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -49,8 +47,6 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 % ; CHECK-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 1 ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], [[TMP25]] ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[FOR_BODY]] ] @@ -59,7 +55,7 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 % ; CHECK-NEXT: [[TMP19:%.*]] = zext <vscale x 2 x i32> [[WIDE_LOAD5]] to <vscale x 2 x i64> ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 2 x i64> [[TMP19]] ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv2p0.i32(<vscale x 2 x ptr> [[TMP20]], i32 1, <vscale x 2 x i1> splat (i1 true)) -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP17]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP25]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll index abee8b9..baf050c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll @@ -70,10 +70,8 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe ; NORMAL_DEP_LIMIT-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP4]], 2 ; NORMAL_DEP_LIMIT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NORMAL_DEP_LIMIT-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NORMAL_DEP_LIMIT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; NORMAL_DEP_LIMIT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; NORMAL_DEP_LIMIT-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() -; NORMAL_DEP_LIMIT-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP6]] to i32 +; NORMAL_DEP_LIMIT-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 ; NORMAL_DEP_LIMIT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP9]], i64 0 ; NORMAL_DEP_LIMIT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; NORMAL_DEP_LIMIT-NEXT: br label [[FOR_BODY:%.*]] @@ -91,7 +89,7 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe ; NORMAL_DEP_LIMIT-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 4 x i32>, ptr [[TMP14]], align 4, !alias.scope [[META7:![0-9]+]], !noalias [[META0]] ; NORMAL_DEP_LIMIT-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD10]], [[VEC_IND]] ; NORMAL_DEP_LIMIT-NEXT: store <vscale x 4 x i32> [[TMP15]], ptr [[TMP14]], align 4, !alias.scope [[META7]], !noalias [[META0]] -; NORMAL_DEP_LIMIT-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP6]] +; NORMAL_DEP_LIMIT-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP8]] ; NORMAL_DEP_LIMIT-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] ; NORMAL_DEP_LIMIT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; NORMAL_DEP_LIMIT-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 9257e45..3b19e9ee 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -36,11 +36,10 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 % ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP5]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -86,11 +85,10 @@ define void @simple_histogram_inc_param(ptr noalias %buckets, ptr readonly %indi ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP5]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -136,11 +134,10 @@ define void @simple_histogram_sub(ptr noalias %buckets, ptr readonly %indices, i ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP5]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -186,11 +183,10 @@ define void @conditional_histogram(ptr noalias %buckets, ptr readonly %indices, ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP3]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP5]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] @@ -248,11 +244,10 @@ define void @histogram_8bit(ptr noalias %buckets, ptr readonly %indices, i64 %N) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP9]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -385,11 +380,10 @@ define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP5]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -446,11 +440,10 @@ define void @histogram_array_3op_gep(i64 noundef %N) #0 { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -500,11 +493,10 @@ define void @histogram_array_4op_gep_nonzero_const_idx(i64 noundef %N, ptr reado ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -611,11 +603,10 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr % ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP6]], -4 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP8]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP11:%.*]] = trunc nuw nsw i64 [[TMP8]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0 @@ -714,11 +705,10 @@ define void @simple_histogram_64b(ptr noalias %buckets, ptr readonly %indices, i ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -2 -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index 124abc6..cdd41a0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -22,8 +22,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; NONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP3]] ; NONE-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] -; NONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NONE-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0 ; NONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; NONE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -31,7 +29,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; NONE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; NONE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; NONE-NEXT: store <vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 4 -; NONE-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP8]] +; NONE-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP3]] ; NONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; NONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NONE: middle.block: @@ -61,8 +59,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] ; DATA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; DATA-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; DATA-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; DATA-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4 ; DATA-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0 ; DATA-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; DATA-NEXT: br label [[VECTOR_BODY:%.*]] @@ -71,7 +67,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[UMAX]]) ; DATA-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; DATA-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; DATA-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]] +; DATA-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP5]] ; DATA-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; DATA-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA: middle.block: @@ -100,8 +96,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_NO_LANEMASK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] ; DATA_NO_LANEMASK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; DATA_NO_LANEMASK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 ; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer @@ -118,7 +112,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_NO_LANEMASK-NEXT: [[TMP12:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; DATA_NO_LANEMASK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; DATA_NO_LANEMASK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP13]], i32 4, <vscale x 4 x i1> [[TMP12]]) -; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP16]] +; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP5]] ; DATA_NO_LANEMASK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] ; DATA_NO_LANEMASK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_NO_LANEMASK: middle.block: @@ -143,12 +137,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL: vector.ph: ; DATA_AND_CONTROL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; DATA_AND_CONTROL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 -; DATA_AND_CONTROL-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] -; DATA_AND_CONTROL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; DATA_AND_CONTROL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; DATA_AND_CONTROL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; DATA_AND_CONTROL-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4 ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0 ; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer @@ -158,7 +146,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; DATA_AND_CONTROL-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; DATA_AND_CONTROL-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]] +; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP5]] ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]]) ; DATA_AND_CONTROL-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; DATA_AND_CONTROL-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0 @@ -185,12 +173,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK: vector.ph: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -205,7 +187,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i1> [[TMP15]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index bba9293..4ed9580 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -73,7 +73,6 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: } ; CHECK: VPlan 'Final VPlan for VF={8,16},UF={1}' { ; CHECK-NEXT: Live-in ir<[[EP_VFxUF:.+]]> = VF * UF -; CHECK-NEXT: Live-in ir<[[EP_VEC_TC:.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<entry>: @@ -83,7 +82,9 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph> ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<vector.ph>: -; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4> +; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<1024>, ir<16> +; CHECK-NEXT: EMIT vp<[[VEC_TC:%.+]]> = sub ir<1024>, vp<%n.mod.vf> +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4> ; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: @@ -98,12 +99,12 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a> ; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%accum>, ir<%mul> ; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16> -; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024> +; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: Successor(s): middle.block, vector.body ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, ir<1024> +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> ; CHECK-EMPTY: @@ -112,7 +113,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<scalar.ph>: -; CHECK-NEXT: EMIT-SCALAR vp<[[EP_RESUME:%.+]]> = phi [ ir<1024>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<[[EP_RESUME:%.+]]> = phi [ vp<[[VEC_TC]]>, middle.block ], [ ir<0>, ir-bb<entry> ] ; CHECK-NEXT: EMIT-SCALAR vp<[[EP_MERGE:%.+]]> = phi [ vp<[[RED_RESULT]]>, middle.block ], [ ir<0>, ir-bb<entry> ] ; CHECK-NEXT: Successor(s): ir-bb<for.body> ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll index c1d4317..9edd6ce 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll @@ -13,8 +13,6 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 { ; WIDE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; WIDE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; WIDE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; WIDE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; WIDE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 ; WIDE-NEXT: br label [[VECTOR_BODY:%.*]] ; WIDE: vector.body: ; WIDE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -24,7 +22,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 { ; WIDE-NEXT: [[TMP6:%.*]] = call <vscale x 4 x float> @foo_vector(<vscale x 4 x float> [[TMP5]], <vscale x 4 x i1> splat (i1 true)) ; WIDE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; WIDE-NEXT: store <vscale x 4 x float> [[TMP6]], ptr [[TMP7]], align 4 -; WIDE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; WIDE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; WIDE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; WIDE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; WIDE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll index be6e918..f3e0a5a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll @@ -28,12 +28,6 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { ; ZVFBFMIN-NEXT: [[ENTRY:.*]]: ; ZVFBFMIN-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; ZVFBFMIN: [[VECTOR_PH]]: -; ZVFBFMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; ZVFBFMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8 -; ZVFBFMIN-NEXT: [[TMP3:%.*]] = sub i64 [[TMP10]], 1 -; ZVFBFMIN-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP3]] -; ZVFBFMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] -; ZVFBFMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; ZVFBFMIN-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; ZVFBFMIN-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP12]], 8 ; ZVFBFMIN-NEXT: br label %[[VECTOR_BODY:.*]] @@ -142,12 +136,6 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 ; ZVFBFMIN-NEXT: [[ENTRY:.*]]: ; ZVFBFMIN-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; ZVFBFMIN: [[VECTOR_PH]]: -; ZVFBFMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; ZVFBFMIN-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; ZVFBFMIN-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], 1 -; ZVFBFMIN-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP10]] -; ZVFBFMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; ZVFBFMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; ZVFBFMIN-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; ZVFBFMIN-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; ZVFBFMIN-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 3a77d2e5..606b863 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -13,12 +13,6 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP7]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8 ; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() @@ -99,12 +93,6 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 333, [[TMP3]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP6]], 8 ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() @@ -185,12 +173,6 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 333, [[TMP3]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP6]], 8 ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() @@ -281,12 +263,6 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP7]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8 ; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() @@ -379,12 +355,6 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 333, [[TMP3]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP6]], 8 ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() @@ -485,12 +455,6 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP7]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i1> poison, i1 [[IC]], i64 0 @@ -604,12 +568,6 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -689,12 +647,6 @@ define void @empty_block_with_phi_2(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -802,12 +754,6 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 { ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP14]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = sub i64 [[TMP19]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP3]], [[TMP20]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP19]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP16]], 4 ; CHECK-NEXT: [[TMP24:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 18757a7..fcfd02b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -20,12 +20,6 @@ define void @dead_load(ptr %p, i16 %start) { ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 1 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 8 -; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP9]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP5]], [[TMP10]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP9]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8 ; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() @@ -111,13 +105,11 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 252, [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 4 ; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[N_VEC]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i32> [[TMP9]], splat (i32 4) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP11]] -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 4, [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 4, [[TMP4]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP14]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -127,7 +119,7 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP15:%.*]] = sext <vscale x 4 x i32> [[VEC_IND]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], <vscale x 4 x i64> [[TMP15]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x ptr> [[TMP16]], i32 1, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -317,12 +309,6 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 37, [[TMP3]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[A]], i64 0 @@ -412,12 +398,6 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = sub i64 [[TMP6]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP15]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll index 7e48926..2a30724 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll @@ -15,12 +15,6 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -78,12 +72,6 @@ define i64 @vector_add_reduce(ptr noalias nocapture %a) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index 5d92081..d86eb91 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -12,12 +12,6 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -108,12 +102,6 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -204,12 +192,6 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -300,12 +282,6 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -396,12 +372,6 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP9]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -521,12 +491,6 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP9]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -646,12 +610,6 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP8]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -765,12 +723,6 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP8]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -884,12 +836,6 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP8]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll index 2b93668..ea8af94 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll @@ -10,12 +10,6 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() @@ -76,12 +70,6 @@ define void @test_wide_ptr_induction(ptr noalias %a, ptr noalias %b, i64 %N) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll index 22a5a0c..1220459 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll @@ -28,12 +28,6 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { ; ZVFHMIN-NEXT: [[ENTRY:.*]]: ; ZVFHMIN-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: -; ZVFHMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8 -; ZVFHMIN-NEXT: [[TMP3:%.*]] = sub i64 [[TMP10]], 1 -; ZVFHMIN-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP3]] -; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] -; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; ZVFHMIN-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP12]], 8 ; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index ce58ae1..aca00a9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -15,7 +15,6 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 0, i32 [[TMP4]] @@ -28,7 +27,7 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i64> @llvm.vector.splice.nxv1i64(<vscale x 1 x i64> [[VECTOR_RECUR]], <vscale x 1 x i64> [[WIDE_LOAD]], i32 -1) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 1 x i64> [[TMP7]], ptr [[TMP8]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll index 8657e2f..713105f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll @@ -31,8 +31,6 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP9]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP18]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP19]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -43,7 +41,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD5]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP10]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -94,8 +92,6 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP10]] ; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] -; ZVFHMIN-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]] ; ZVFHMIN: [[VECTOR_BODY]]: ; ZVFHMIN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -106,7 +102,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP17:%.*]] = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD5]]) ; ZVFHMIN-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] ; ZVFHMIN-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP18]], align 4 -; ZVFHMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; ZVFHMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; ZVFHMIN-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ZVFHMIN-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; ZVFHMIN: [[MIDDLE_BLOCK]]: @@ -180,8 +176,6 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP9]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP18]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP19]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -192,7 +186,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 4 x float> @llvm.maximumnum.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD5]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP10]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -243,8 +237,6 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP10]] ; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] -; ZVFHMIN-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]] ; ZVFHMIN: [[VECTOR_BODY]]: ; ZVFHMIN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -255,7 +247,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP17:%.*]] = call <vscale x 4 x float> @llvm.maximumnum.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD5]]) ; ZVFHMIN-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] ; ZVFHMIN-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP18]], align 4 -; ZVFHMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; ZVFHMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; ZVFHMIN-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ZVFHMIN-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ZVFHMIN: [[MIDDLE_BLOCK]]: @@ -329,8 +321,6 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP9]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP18]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP19]], 2 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -341,7 +331,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 2 x double> @llvm.minimumnum.nxv2f64(<vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x double> [[WIDE_LOAD5]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 2 x double> [[TMP17]], ptr [[TMP10]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -392,8 +382,6 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2 ; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP10]] ; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] -; ZVFHMIN-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2 ; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]] ; ZVFHMIN: [[VECTOR_BODY]]: ; ZVFHMIN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -404,7 +392,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP17:%.*]] = call <vscale x 2 x double> @llvm.minimumnum.nxv2f64(<vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x double> [[WIDE_LOAD5]]) ; ZVFHMIN-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] ; ZVFHMIN-NEXT: store <vscale x 2 x double> [[TMP17]], ptr [[TMP18]], align 8 -; ZVFHMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; ZVFHMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; ZVFHMIN-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ZVFHMIN-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; ZVFHMIN: [[MIDDLE_BLOCK]]: @@ -478,8 +466,6 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP9]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP18]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP19]], 2 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -490,7 +476,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 2 x double> @llvm.maximumnum.nxv2f64(<vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x double> [[WIDE_LOAD5]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 2 x double> [[TMP17]], ptr [[TMP10]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -541,8 +527,6 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2 ; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP10]] ; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]] -; ZVFHMIN-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2 ; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]] ; ZVFHMIN: [[VECTOR_BODY]]: ; ZVFHMIN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -553,7 +537,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP17:%.*]] = call <vscale x 2 x double> @llvm.maximumnum.nxv2f64(<vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x double> [[WIDE_LOAD5]]) ; ZVFHMIN-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] ; ZVFHMIN-NEXT: store <vscale x 2 x double> [[TMP17]], ptr [[TMP18]], align 8 -; ZVFHMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; ZVFHMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; ZVFHMIN-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ZVFHMIN-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; ZVFHMIN: [[MIDDLE_BLOCK]]: @@ -619,12 +603,6 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP10]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 4096, [[TMP8]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -683,12 +661,6 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: -; ZVFHMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8 -; ZVFHMIN-NEXT: [[TMP14:%.*]] = sub i64 [[TMP10]], 1 -; ZVFHMIN-NEXT: [[N_RND_UP:%.*]] = add i64 4096, [[TMP14]] -; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] -; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; ZVFHMIN-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8 ; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]] @@ -770,12 +742,6 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP10]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 4096, [[TMP8]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -834,12 +800,6 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: -; ZVFHMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8 -; ZVFHMIN-NEXT: [[TMP14:%.*]] = sub i64 [[TMP10]], 1 -; ZVFHMIN-NEXT: [[N_RND_UP:%.*]] = add i64 4096, [[TMP14]] -; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] -; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; ZVFHMIN-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8 ; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index 60ea48e..60f7c24 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -53,12 +53,6 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT19]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP46:%.*]] = mul nuw i64 [[TMP45]], 8 -; CHECK-NEXT: [[TMP22:%.*]] = sub i64 [[TMP46]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP5]], [[TMP22]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP46]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP52:%.*]] = mul nuw i64 [[TMP51]], 8 ; CHECK-NEXT: [[TMP53:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 2f92ab5..1b2ac80 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -23,8 +23,6 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; OUTLOOP-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 4 ; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] ; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] -; OUTLOOP-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; OUTLOOP-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 4 ; OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; OUTLOOP: vector.body: ; OUTLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -33,7 +31,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP7]], align 2 ; OUTLOOP-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i16> [[WIDE_LOAD]] to <vscale x 4 x i32> ; OUTLOOP-NEXT: [[TMP10]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP9]] -; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] +; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] ; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OUTLOOP: middle.block: @@ -75,8 +73,6 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 8 ; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] ; INLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] -; INLOOP-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; INLOOP-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 8 ; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; INLOOP: vector.body: ; INLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -86,7 +82,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: [[TMP9:%.*]] = sext <vscale x 8 x i16> [[WIDE_LOAD]] to <vscale x 8 x i32> ; INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP9]]) ; INLOOP-NEXT: [[TMP11]] = add i32 [[VEC_PHI]], [[TMP10]] -; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] +; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] ; INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; INLOOP: middle.block: @@ -120,12 +116,6 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-OUTLOOP: for.body.preheader: ; IF-EVL-OUTLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL-OUTLOOP: vector.ph: -; IF-EVL-OUTLOOP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-OUTLOOP-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 4 -; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1 -; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP2]] -; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] -; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-OUTLOOP-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 4 ; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -174,12 +164,6 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-INLOOP: for.body.preheader: ; IF-EVL-INLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL-INLOOP: vector.ph: -; IF-EVL-INLOOP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 8 -; IF-EVL-INLOOP-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1 -; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP2]] -; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] -; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-INLOOP-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 8 ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -252,8 +236,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; OUTLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; OUTLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 ; OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -264,7 +246,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; OUTLOOP-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] ; OUTLOOP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]] -; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; OUTLOOP: middle.block: @@ -300,8 +282,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; INLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; INLOOP: vector.body: ; INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -310,7 +290,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; INLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) ; INLOOP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) -; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; INLOOP: middle.block: @@ -338,12 +318,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: entry: ; IF-EVL-OUTLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL-OUTLOOP: vector.ph: -; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 @@ -352,7 +326,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP: vector.body: ; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) @@ -389,19 +363,13 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: entry: ; IF-EVL-INLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL-INLOOP: vector.ph: -; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL-INLOOP: vector.body: ; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index ad25a5f..8046394 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -8,12 +8,6 @@ define void @load_store_factor2_i32(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP12]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -112,12 +106,6 @@ define void @load_store_factor2_i32(ptr %p) { ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[TMP12:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP12]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -198,12 +186,6 @@ define void @load_store_factor2_i64(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP12]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -302,12 +284,6 @@ define void @load_store_factor2_i64(ptr %p) { ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[TMP12:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP12]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -388,12 +364,6 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP15]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -508,12 +478,6 @@ define void @load_store_factor3_i32(ptr %p) { ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[TMP15:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP15]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -607,12 +571,6 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP15]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -727,12 +685,6 @@ define void @load_store_factor3_i64(ptr %p) { ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[TMP15:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP15]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -826,12 +778,6 @@ define void @load_store_factor4(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP18]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -960,12 +906,6 @@ define void @load_store_factor4(ptr %p) { ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[TMP18:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP18]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1072,11 +1012,6 @@ define void @load_store_factor5(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP3]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1220,11 +1155,6 @@ define void @load_store_factor5(ptr %p) { ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = sub i64 [[TMP4]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP3]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: @@ -1343,11 +1273,6 @@ define void @load_store_factor6(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP3]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1506,11 +1431,6 @@ define void @load_store_factor6(ptr %p) { ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = sub i64 [[TMP4]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP3]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: @@ -1642,11 +1562,6 @@ define void @load_store_factor7(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP3]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1821,11 +1736,6 @@ define void @load_store_factor7(ptr %p) { ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = sub i64 [[TMP4]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP3]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: @@ -1970,11 +1880,6 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP36:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP36]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -2162,11 +2067,6 @@ define void @load_store_factor8(ptr %p) { ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP36:%.*]] = sub i64 [[TMP1]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP36]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: @@ -2324,12 +2224,6 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP14]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2422,12 +2316,6 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[TMP14:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP14]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2506,12 +2394,6 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP14]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2604,12 +2486,6 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[TMP14:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP14]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll index 976ce77..1bceb87 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll @@ -18,37 +18,35 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 4 ; SCALAR_EPILOGUE-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP2]] ; SCALAR_EPILOGUE-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] -; SCALAR_EPILOGUE-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_EPILOGUE-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 4 ; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0 ; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer -; SCALAR_EPILOGUE-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() -; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP4]], i64 0 +; SCALAR_EPILOGUE-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() +; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP2]], i64 0 ; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; SCALAR_EPILOGUE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_EPILOGUE: vector.body: ; SCALAR_EPILOGUE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_EPILOGUE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_EPILOGUE-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; SCALAR_EPILOGUE-NEXT: [[TMP7:%.*]] = shl i32 [[INDEX]], 1 -; SCALAR_EPILOGUE-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 -; SCALAR_EPILOGUE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP8]] -; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]]) -; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.masked.load.nxv32i8.p0(ptr [[TMP9]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK]], <vscale x 32 x i8> poison) +; SCALAR_EPILOGUE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_EPILOGUE-NEXT: [[TMP4:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; SCALAR_EPILOGUE-NEXT: [[TMP5:%.*]] = shl i32 [[INDEX]], 1 +; SCALAR_EPILOGUE-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 +; SCALAR_EPILOGUE-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP6]] +; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]]) +; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.masked.load.nxv32i8.p0(ptr [[TMP7]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK]], <vscale x 32 x i8> poison) ; SCALAR_EPILOGUE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[WIDE_MASKED_VEC]]) -; SCALAR_EPILOGUE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 -; SCALAR_EPILOGUE-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 -; SCALAR_EPILOGUE-NEXT: [[TMP12:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP10]], <vscale x 16 x i8> [[TMP11]]) -; SCALAR_EPILOGUE-NEXT: [[TMP13:%.*]] = sext i32 [[TMP7]] to i64 -; SCALAR_EPILOGUE-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP13]] -; SCALAR_EPILOGUE-NEXT: [[TMP15:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP12]] -; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[TMP12]], <vscale x 16 x i8> [[TMP15]]) -; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]]) -; SCALAR_EPILOGUE-NEXT: call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVED_VEC]], ptr [[TMP14]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK3]]) -; SCALAR_EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] +; SCALAR_EPILOGUE-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 +; SCALAR_EPILOGUE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 +; SCALAR_EPILOGUE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]]) +; SCALAR_EPILOGUE-NEXT: [[TMP11:%.*]] = sext i32 [[TMP5]] to i64 +; SCALAR_EPILOGUE-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP11]] +; SCALAR_EPILOGUE-NEXT: [[TMP13:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP10]] +; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[TMP10]], <vscale x 16 x i8> [[TMP13]]) +; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]]) +; SCALAR_EPILOGUE-NEXT: call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVED_VEC]], ptr [[TMP12]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK3]]) +; SCALAR_EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] ; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; SCALAR_EPILOGUE-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SCALAR_EPILOGUE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SCALAR_EPILOGUE-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; SCALAR_EPILOGUE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALAR_EPILOGUE: middle.block: ; SCALAR_EPILOGUE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_EPILOGUE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -65,39 +63,37 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_DATA-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1023 ; PREDICATED_DATA-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] ; PREDICATED_DATA-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] -; PREDICATED_DATA-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; PREDICATED_DATA-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4 ; PREDICATED_DATA-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0 ; PREDICATED_DATA-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer -; PREDICATED_DATA-NEXT: [[TMP4:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() -; PREDICATED_DATA-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP3]], i64 0 +; PREDICATED_DATA-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() +; PREDICATED_DATA-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0 ; PREDICATED_DATA-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; PREDICATED_DATA-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_DATA: vector.body: ; PREDICATED_DATA-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_DATA-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_DATA-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_DATA-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 1024) -; PREDICATED_DATA-NEXT: [[TMP5:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; PREDICATED_DATA-NEXT: [[TMP6:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i1> zeroinitializer -; PREDICATED_DATA-NEXT: [[TMP7:%.*]] = shl i32 [[INDEX]], 1 -; PREDICATED_DATA-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 -; PREDICATED_DATA-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP8]] -; PREDICATED_DATA-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]]) -; PREDICATED_DATA-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.masked.load.nxv32i8.p0(ptr [[TMP9]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK]], <vscale x 32 x i8> poison) +; PREDICATED_DATA-NEXT: [[TMP3:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; PREDICATED_DATA-NEXT: [[TMP4:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP3]], <vscale x 16 x i1> zeroinitializer +; PREDICATED_DATA-NEXT: [[TMP5:%.*]] = shl i32 [[INDEX]], 1 +; PREDICATED_DATA-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 +; PREDICATED_DATA-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP6]] +; PREDICATED_DATA-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]]) +; PREDICATED_DATA-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.masked.load.nxv32i8.p0(ptr [[TMP7]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK]], <vscale x 32 x i8> poison) ; PREDICATED_DATA-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[WIDE_MASKED_VEC]]) -; PREDICATED_DATA-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 -; PREDICATED_DATA-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 -; PREDICATED_DATA-NEXT: [[TMP12:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP10]], <vscale x 16 x i8> [[TMP11]]) -; PREDICATED_DATA-NEXT: [[TMP13:%.*]] = sext i32 [[TMP7]] to i64 -; PREDICATED_DATA-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP13]] -; PREDICATED_DATA-NEXT: [[TMP15:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP12]] -; PREDICATED_DATA-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[TMP12]], <vscale x 16 x i8> [[TMP15]]) -; PREDICATED_DATA-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]]) -; PREDICATED_DATA-NEXT: call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVED_VEC]], ptr [[TMP14]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK3]]) -; PREDICATED_DATA-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] +; PREDICATED_DATA-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 +; PREDICATED_DATA-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 +; PREDICATED_DATA-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]]) +; PREDICATED_DATA-NEXT: [[TMP11:%.*]] = sext i32 [[TMP5]] to i64 +; PREDICATED_DATA-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP11]] +; PREDICATED_DATA-NEXT: [[TMP13:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP10]] +; PREDICATED_DATA-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[TMP10]], <vscale x 16 x i8> [[TMP13]]) +; PREDICATED_DATA-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]]) +; PREDICATED_DATA-NEXT: call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVED_VEC]], ptr [[TMP12]], i32 1, <vscale x 32 x i1> [[INTERLEAVED_MASK3]]) +; PREDICATED_DATA-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP1]] ; PREDICATED_DATA-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; PREDICATED_DATA-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; PREDICATED_DATA-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; PREDICATED_DATA-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; PREDICATED_DATA-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_DATA: middle.block: ; PREDICATED_DATA-NEXT: br label [[FOR_END:%.*]] ; PREDICATED_DATA: scalar.ph: @@ -197,41 +193,39 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 4 ; SCALAR_EPILOGUE-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP2]] ; SCALAR_EPILOGUE-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] -; SCALAR_EPILOGUE-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_EPILOGUE-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 4 ; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0 ; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer -; SCALAR_EPILOGUE-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() -; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP4]], i64 0 +; SCALAR_EPILOGUE-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() +; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP2]], i64 0 ; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; SCALAR_EPILOGUE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_EPILOGUE: vector.body: ; SCALAR_EPILOGUE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_EPILOGUE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_EPILOGUE-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; SCALAR_EPILOGUE-NEXT: [[TMP7:%.*]] = shl i32 [[INDEX]], 2 -; SCALAR_EPILOGUE-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 -; SCALAR_EPILOGUE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP8]] -; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]]) -; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8.p0(ptr [[TMP9]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK]], <vscale x 64 x i8> poison) +; SCALAR_EPILOGUE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALAR_EPILOGUE-NEXT: [[TMP4:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; SCALAR_EPILOGUE-NEXT: [[TMP5:%.*]] = shl i32 [[INDEX]], 2 +; SCALAR_EPILOGUE-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 +; SCALAR_EPILOGUE-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP6]] +; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]]) +; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8.p0(ptr [[TMP7]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK]], <vscale x 64 x i8> poison) ; SCALAR_EPILOGUE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave4.nxv64i8(<vscale x 64 x i8> [[WIDE_MASKED_VEC]]) -; SCALAR_EPILOGUE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 -; SCALAR_EPILOGUE-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 -; SCALAR_EPILOGUE-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 2 -; SCALAR_EPILOGUE-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 3 +; SCALAR_EPILOGUE-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 +; SCALAR_EPILOGUE-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 +; SCALAR_EPILOGUE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 2 +; SCALAR_EPILOGUE-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 3 +; SCALAR_EPILOGUE-NEXT: [[TMP12:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]]) +; SCALAR_EPILOGUE-NEXT: [[TMP13:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP12]] ; SCALAR_EPILOGUE-NEXT: [[TMP14:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP10]], <vscale x 16 x i8> [[TMP11]]) ; SCALAR_EPILOGUE-NEXT: [[TMP15:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP14]] -; SCALAR_EPILOGUE-NEXT: [[TMP16:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP12]], <vscale x 16 x i8> [[TMP13]]) -; SCALAR_EPILOGUE-NEXT: [[TMP17:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP16]] -; SCALAR_EPILOGUE-NEXT: [[TMP18:%.*]] = sext i32 [[TMP7]] to i64 -; SCALAR_EPILOGUE-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP18]] -; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv64i8(<vscale x 16 x i8> [[TMP14]], <vscale x 16 x i8> [[TMP15]], <vscale x 16 x i8> [[TMP16]], <vscale x 16 x i8> [[TMP17]]) -; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]]) -; SCALAR_EPILOGUE-NEXT: call void @llvm.masked.store.nxv64i8.p0(<vscale x 64 x i8> [[INTERLEAVED_VEC]], ptr [[TMP19]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK3]]) -; SCALAR_EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] +; SCALAR_EPILOGUE-NEXT: [[TMP16:%.*]] = sext i32 [[TMP5]] to i64 +; SCALAR_EPILOGUE-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP16]] +; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv64i8(<vscale x 16 x i8> [[TMP12]], <vscale x 16 x i8> [[TMP13]], <vscale x 16 x i8> [[TMP14]], <vscale x 16 x i8> [[TMP15]]) +; SCALAR_EPILOGUE-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]]) +; SCALAR_EPILOGUE-NEXT: call void @llvm.masked.store.nxv64i8.p0(<vscale x 64 x i8> [[INTERLEAVED_VEC]], ptr [[TMP17]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK3]]) +; SCALAR_EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] ; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; SCALAR_EPILOGUE-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SCALAR_EPILOGUE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SCALAR_EPILOGUE-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; SCALAR_EPILOGUE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_EPILOGUE: middle.block: ; SCALAR_EPILOGUE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_EPILOGUE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -248,43 +242,41 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_DATA-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1023 ; PREDICATED_DATA-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] ; PREDICATED_DATA-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] -; PREDICATED_DATA-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; PREDICATED_DATA-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4 ; PREDICATED_DATA-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0 ; PREDICATED_DATA-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer -; PREDICATED_DATA-NEXT: [[TMP4:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() -; PREDICATED_DATA-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP3]], i64 0 +; PREDICATED_DATA-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32() +; PREDICATED_DATA-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0 ; PREDICATED_DATA-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; PREDICATED_DATA-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_DATA: vector.body: ; PREDICATED_DATA-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_DATA-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDICATED_DATA-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_DATA-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 1024) -; PREDICATED_DATA-NEXT: [[TMP5:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; PREDICATED_DATA-NEXT: [[TMP6:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i1> zeroinitializer -; PREDICATED_DATA-NEXT: [[TMP7:%.*]] = shl i32 [[INDEX]], 2 -; PREDICATED_DATA-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 -; PREDICATED_DATA-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP8]] -; PREDICATED_DATA-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]]) -; PREDICATED_DATA-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8.p0(ptr [[TMP9]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK]], <vscale x 64 x i8> poison) +; PREDICATED_DATA-NEXT: [[TMP3:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; PREDICATED_DATA-NEXT: [[TMP4:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP3]], <vscale x 16 x i1> zeroinitializer +; PREDICATED_DATA-NEXT: [[TMP5:%.*]] = shl i32 [[INDEX]], 2 +; PREDICATED_DATA-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 +; PREDICATED_DATA-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP6]] +; PREDICATED_DATA-NEXT: [[INTERLEAVED_MASK:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]]) +; PREDICATED_DATA-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.masked.load.nxv64i8.p0(ptr [[TMP7]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK]], <vscale x 64 x i8> poison) ; PREDICATED_DATA-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave4.nxv64i8(<vscale x 64 x i8> [[WIDE_MASKED_VEC]]) -; PREDICATED_DATA-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 -; PREDICATED_DATA-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 -; PREDICATED_DATA-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 2 -; PREDICATED_DATA-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 3 +; PREDICATED_DATA-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 0 +; PREDICATED_DATA-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 1 +; PREDICATED_DATA-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 2 +; PREDICATED_DATA-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[STRIDED_VEC]], 3 +; PREDICATED_DATA-NEXT: [[TMP12:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]]) +; PREDICATED_DATA-NEXT: [[TMP13:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP12]] ; PREDICATED_DATA-NEXT: [[TMP14:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP10]], <vscale x 16 x i8> [[TMP11]]) ; PREDICATED_DATA-NEXT: [[TMP15:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP14]] -; PREDICATED_DATA-NEXT: [[TMP16:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[TMP12]], <vscale x 16 x i8> [[TMP13]]) -; PREDICATED_DATA-NEXT: [[TMP17:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP16]] -; PREDICATED_DATA-NEXT: [[TMP18:%.*]] = sext i32 [[TMP7]] to i64 -; PREDICATED_DATA-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP18]] -; PREDICATED_DATA-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv64i8(<vscale x 16 x i8> [[TMP14]], <vscale x 16 x i8> [[TMP15]], <vscale x 16 x i8> [[TMP16]], <vscale x 16 x i8> [[TMP17]]) -; PREDICATED_DATA-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1> [[TMP6]]) -; PREDICATED_DATA-NEXT: call void @llvm.masked.store.nxv64i8.p0(<vscale x 64 x i8> [[INTERLEAVED_VEC]], ptr [[TMP19]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK3]]) -; PREDICATED_DATA-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] +; PREDICATED_DATA-NEXT: [[TMP16:%.*]] = sext i32 [[TMP5]] to i64 +; PREDICATED_DATA-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP16]] +; PREDICATED_DATA-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv64i8(<vscale x 16 x i8> [[TMP12]], <vscale x 16 x i8> [[TMP13]], <vscale x 16 x i8> [[TMP14]], <vscale x 16 x i8> [[TMP15]]) +; PREDICATED_DATA-NEXT: [[INTERLEAVED_MASK3:%.*]] = call <vscale x 64 x i1> @llvm.vector.interleave4.nxv64i1(<vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[TMP4]]) +; PREDICATED_DATA-NEXT: call void @llvm.masked.store.nxv64i8.p0(<vscale x 64 x i8> [[INTERLEAVED_VEC]], ptr [[TMP17]], i32 1, <vscale x 64 x i1> [[INTERLEAVED_MASK3]]) +; PREDICATED_DATA-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP1]] ; PREDICATED_DATA-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; PREDICATED_DATA-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; PREDICATED_DATA-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; PREDICATED_DATA-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; PREDICATED_DATA-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PREDICATED_DATA: middle.block: ; PREDICATED_DATA-NEXT: br label [[FOR_END:%.*]] ; PREDICATED_DATA: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll index 946cb95..f731d39 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll @@ -15,7 +15,6 @@ define void @load_store(ptr %p) { ; LMUL1-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; LMUL1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; LMUL1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; LMUL1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; LMUL1-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL1: vector.body: ; LMUL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -23,7 +22,7 @@ define void @load_store(ptr %p) { ; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP3]], align 8 ; LMUL1-NEXT: [[TMP5:%.*]] = add <vscale x 1 x i64> [[WIDE_LOAD]], splat (i64 1) ; LMUL1-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP3]], align 8 -; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; LMUL1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL1: middle.block: @@ -48,12 +47,6 @@ define void @load_store(ptr %p) { ; LMUL2-NEXT: entry: ; LMUL2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; LMUL2: vector.ph: -; LMUL2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; LMUL2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; LMUL2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; LMUL2-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] -; LMUL2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; LMUL2-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; LMUL2-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; LMUL2-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2 ; LMUL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -91,12 +84,6 @@ define void @load_store(ptr %p) { ; LMUL4-NEXT: entry: ; LMUL4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; LMUL4: vector.ph: -; LMUL4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; LMUL4-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; LMUL4-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; LMUL4-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] -; LMUL4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; LMUL4-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; LMUL4-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; LMUL4-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 ; LMUL4-NEXT: br label [[VECTOR_BODY:%.*]] @@ -134,12 +121,6 @@ define void @load_store(ptr %p) { ; LMUL8-NEXT: entry: ; LMUL8-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; LMUL8: vector.ph: -; LMUL8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; LMUL8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; LMUL8-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; LMUL8-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] -; LMUL8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; LMUL8-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; LMUL8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; LMUL8-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 8 ; LMUL8-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index aad789c..3de3943 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -123,12 +123,6 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -185,12 +179,6 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP9]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 16, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP9]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP3]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -248,12 +236,6 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP0]], 16 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP9]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 32, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP9]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP3]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -310,12 +292,6 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 24, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index e5e694c..58506f7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -13,12 +13,6 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: -; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; VLENUNK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP3]], 1 -; VLENUNK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP12]] -; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; VLENUNK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index b28d801..f853cf1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -30,12 +30,6 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] ; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; RV32: vector.ph: -; RV32-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 -; RV32-NEXT: [[TMP2:%.*]] = sub i64 [[TMP4]], 1 -; RV32-NEXT: [[N_RND_UP:%.*]] = add i64 625, [[TMP2]] -; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; RV32-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; RV32-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; RV32-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; RV32-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() @@ -110,12 +104,6 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] ; RV64-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; RV64: vector.ph: -; RV64-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 -; RV64-NEXT: [[TMP2:%.*]] = sub i64 [[TMP4]], 1 -; RV64-NEXT: [[N_RND_UP:%.*]] = add i64 625, [[TMP2]] -; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; RV64-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; RV64-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; RV64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; RV64-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll index 10af538..bbd78a4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll @@ -21,8 +21,6 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; V-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; V-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; V-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; V-NEXT: br label [[VECTOR_BODY:%.*]] ; V: vector.body: ; V-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -35,7 +33,7 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; V-NEXT: [[TMP11:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> ; V-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]] ; V-NEXT: [[TMP13]] = add <vscale x 4 x i32> [[TMP12]], [[VEC_PHI]] -; V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; V-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; V-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; V: middle.block: @@ -56,8 +54,6 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; ZVQDOTQ-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; ZVQDOTQ-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]] ; ZVQDOTQ: vector.body: ; ZVQDOTQ-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -70,7 +66,7 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: [[TMP11:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> ; ZVQDOTQ-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]] ; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 1 x i32> @llvm.experimental.vector.partial.reduce.add.nxv1i32.nxv4i32(<vscale x 1 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP12]]) -; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; ZVQDOTQ-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ZVQDOTQ-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; ZVQDOTQ: middle.block: @@ -185,8 +181,6 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; V-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; V-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; V-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; V-NEXT: br label [[VECTOR_BODY:%.*]] ; V: vector.body: ; V-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -199,7 +193,7 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; V-NEXT: [[TMP11:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> ; V-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]] ; V-NEXT: [[TMP13]] = add <vscale x 4 x i32> [[TMP12]], [[VEC_PHI]] -; V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; V-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; V-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; V: middle.block: @@ -220,8 +214,6 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; ZVQDOTQ-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; ZVQDOTQ-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]] ; ZVQDOTQ: vector.body: ; ZVQDOTQ-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -234,7 +226,7 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: [[TMP11:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> ; ZVQDOTQ-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]] ; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 1 x i32> @llvm.experimental.vector.partial.reduce.add.nxv1i32.nxv4i32(<vscale x 1 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP12]]) -; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; ZVQDOTQ-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ZVQDOTQ-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ZVQDOTQ: middle.block: @@ -349,8 +341,6 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; V-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; V-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; V-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; V-NEXT: br label [[VECTOR_BODY:%.*]] ; V: vector.body: ; V-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -363,7 +353,7 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; V-NEXT: [[TMP11:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> ; V-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]] ; V-NEXT: [[TMP13]] = add <vscale x 4 x i32> [[TMP12]], [[VEC_PHI]] -; V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; V-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; V-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; V: middle.block: @@ -384,8 +374,6 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; ZVQDOTQ-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; ZVQDOTQ-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]] ; ZVQDOTQ: vector.body: ; ZVQDOTQ-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -398,7 +386,7 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: [[TMP11:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> ; ZVQDOTQ-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]] ; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 1 x i32> @llvm.experimental.vector.partial.reduce.add.nxv1i32.nxv4i32(<vscale x 1 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP12]]) -; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; ZVQDOTQ-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ZVQDOTQ-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; ZVQDOTQ: middle.block: @@ -512,8 +500,6 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; V-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; V-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; V-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; V-NEXT: br label [[VECTOR_BODY:%.*]] ; V: vector.body: ; V-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -526,7 +512,7 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; V-NEXT: [[TMP11:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> ; V-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]] ; V-NEXT: [[TMP13]] = add <vscale x 4 x i32> [[TMP12]], [[VEC_PHI]] -; V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; V-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; V-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; V: middle.block: @@ -547,8 +533,6 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; ZVQDOTQ-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; ZVQDOTQ-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]] ; ZVQDOTQ: vector.body: ; ZVQDOTQ-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -561,7 +545,7 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: [[TMP11:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> ; ZVQDOTQ-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]] ; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 1 x i32> @llvm.experimental.vector.partial.reduce.add.nxv1i32.nxv4i32(<vscale x 1 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP12]]) -; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; ZVQDOTQ-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ZVQDOTQ-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; ZVQDOTQ: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index cad15d1..c95adf2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -11,12 +11,6 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1001, [[TMP12]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[A]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index d41d47a..ca94fce 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -7,12 +7,6 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH1:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 9, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i8> poison, i8 [[B]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll index 39e865b..2028df7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll @@ -11,12 +11,6 @@ define i32 @add(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP12]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -79,12 +73,6 @@ define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP12]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -147,12 +135,6 @@ define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP12]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -215,12 +197,6 @@ define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP12]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -283,12 +259,6 @@ define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -354,12 +324,6 @@ define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -425,12 +389,6 @@ define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP12]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -491,12 +449,6 @@ define half @fadd_fast_half_zvfh(ptr noalias nocapture readonly %a, i64 %n) "tar ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP12]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -683,12 +635,6 @@ define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -752,12 +698,6 @@ define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) # ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -821,12 +761,6 @@ define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -892,12 +826,6 @@ define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -961,12 +889,6 @@ define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) # ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1030,12 +952,6 @@ define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1241,12 +1157,6 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1313,12 +1223,6 @@ define half @fmuladd_f16_zvfh(ptr %a, ptr %b, i64 %n) "target-features"="+zvfh" ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/remark-reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/remark-reductions.ll index a0d29e8..6413ded3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/remark-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/remark-reductions.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v -p loop-vectorize -pass-remarks-analysis=loop-vectorize -S 2>&1 | FileCheck %s ; CHECK: remark: <unknown>:0:0: the cost-model indicates that interleaving is not beneficial @@ -7,12 +8,6 @@ define float @s311(float %a_0, float %s311_sum) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 1200, [[TMP8]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[A_0]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 186573f..6e58fdf 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -21,12 +21,6 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[ENTRY:.*]]: ; RV64-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: -; RV64-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; RV64-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; RV64-NEXT: [[N_RND_UP:%.*]] = add i64 1023, [[TMP6]] -; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; RV64-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; RV64-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; RV64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; RV64-NEXT: br label %[[VECTOR_BODY:.*]] @@ -72,12 +66,6 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[ENTRY:.*]]: ; RV32-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV32: [[VECTOR_PH]]: -; RV32-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; RV32-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; RV32-NEXT: [[N_RND_UP:%.*]] = add i64 1023, [[TMP6]] -; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; RV32-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; RV32-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; RV32-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; RV32-NEXT: br label %[[VECTOR_BODY:.*]] @@ -124,14 +112,12 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1023, [[TMP1]] ; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: -; RV64-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; RV64-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1023, [[TMP3]] -; RV64-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1023, [[N_MOD_VF]] ; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; RV64-UF2-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]] ; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]] +; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]] ; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64-UF2: [[VECTOR_BODY]]: ; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -170,13 +156,13 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[REVERSE4:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP21]]) ; RV64-UF2-NEXT: store <vscale x 4 x i32> [[REVERSE4]], ptr [[TMP32]], align 4 ; RV64-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; RV64-UF2-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; RV64-UF2-NEXT: br i1 [[TMP33]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; RV64-UF2-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[TMP7]] +; RV64-UF2-NEXT: br i1 [[TMP34]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; RV64-UF2: [[MIDDLE_BLOCK]]: -; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1023, [[N_VEC]] +; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1023, [[TMP7]] ; RV64-UF2-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] ; RV64-UF2: [[SCALAR_PH]]: -; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] +; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP33]], %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] ; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] ; RV64-UF2: [[FOR_BODY]]: ; @@ -230,12 +216,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] ; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: -; RV64-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 -; RV64-NEXT: [[TMP19:%.*]] = sub i64 [[TMP16]], 1 -; RV64-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP19]] -; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP16]] -; RV64-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; RV64-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() ; RV64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4 ; RV64-NEXT: br label %[[VECTOR_BODY:.*]] @@ -301,12 +281,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP6]], [[TMP5]] ; RV32-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV32: [[VECTOR_PH]]: -; RV32-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 -; RV32-NEXT: [[TMP11:%.*]] = sub i64 [[TMP8]], 1 -; RV32-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP11]] -; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP8]] -; RV32-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; RV32-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; RV32-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; RV32-NEXT: br label %[[VECTOR_BODY:.*]] @@ -386,15 +360,13 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] ; RV64-UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: -; RV64-UF2-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 -; RV64-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP16]] -; RV64-UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; RV64-UF2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() ; RV64-UF2-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4 ; RV64-UF2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 [[TMP0]], [[TMP19]] ; RV64-UF2-NEXT: [[TMP20:%.*]] = sub i64 [[TMP0]], [[N_VEC]] -; RV64-UF2-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 +; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]] +; RV64-UF2-NEXT: [[DOTCAST:%.*]] = trunc i64 [[TMP20]] to i32 ; RV64-UF2-NEXT: [[TMP21:%.*]] = sub i32 [[N]], [[DOTCAST]] ; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64-UF2: [[VECTOR_BODY]]: @@ -436,13 +408,13 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[REVERSE7:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP36]]) ; RV64-UF2-NEXT: store <vscale x 4 x i32> [[REVERSE7]], ptr [[TMP47]], align 4 ; RV64-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] -; RV64-UF2-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; RV64-UF2-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; RV64-UF2-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[TMP20]] +; RV64-UF2-NEXT: br i1 [[TMP49]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; RV64-UF2: [[MIDDLE_BLOCK]]: -; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[TMP20]] ; RV64-UF2-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; RV64-UF2: [[SCALAR_PH]]: -; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP20]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] +; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP48]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] ; RV64-UF2-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[FOR_BODY_PREHEADER]] ], [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ] ; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] ; RV64-UF2: [[FOR_COND_CLEANUP_LOOPEXIT]]: @@ -509,12 +481,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] ; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: -; RV64-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 -; RV64-NEXT: [[TMP19:%.*]] = sub i64 [[TMP16]], 1 -; RV64-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP19]] -; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP16]] -; RV64-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; RV64-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() ; RV64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4 ; RV64-NEXT: br label %[[VECTOR_BODY:.*]] @@ -580,12 +546,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP6]], [[TMP5]] ; RV32-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV32: [[VECTOR_PH]]: -; RV32-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 -; RV32-NEXT: [[TMP11:%.*]] = sub i64 [[TMP8]], 1 -; RV32-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP11]] -; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP8]] -; RV32-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; RV32-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; RV32-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; RV32-NEXT: br label %[[VECTOR_BODY:.*]] @@ -665,15 +625,13 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] ; RV64-UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: -; RV64-UF2-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 -; RV64-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP16]] -; RV64-UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; RV64-UF2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() ; RV64-UF2-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4 ; RV64-UF2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 [[TMP0]], [[TMP19]] ; RV64-UF2-NEXT: [[TMP20:%.*]] = sub i64 [[TMP0]], [[N_VEC]] -; RV64-UF2-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 +; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]] +; RV64-UF2-NEXT: [[DOTCAST:%.*]] = trunc i64 [[TMP20]] to i32 ; RV64-UF2-NEXT: [[TMP21:%.*]] = sub i32 [[N]], [[DOTCAST]] ; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64-UF2: [[VECTOR_BODY]]: @@ -715,13 +673,13 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[REVERSE7:%.*]] = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[TMP36]]) ; RV64-UF2-NEXT: store <vscale x 4 x float> [[REVERSE7]], ptr [[TMP47]], align 4 ; RV64-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] -; RV64-UF2-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; RV64-UF2-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; RV64-UF2-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[TMP20]] +; RV64-UF2-NEXT: br i1 [[TMP49]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; RV64-UF2: [[MIDDLE_BLOCK]]: -; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[TMP20]] ; RV64-UF2-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; RV64-UF2: [[SCALAR_PH]]: -; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP20]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] +; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP48]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] ; RV64-UF2-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[FOR_BODY_PREHEADER]] ], [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ] ; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] ; RV64-UF2: [[FOR_COND_CLEANUP_LOOPEXIT]]: @@ -762,12 +720,6 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[ENTRY:.*]]: ; RV64-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: -; RV64-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; RV64-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; RV64-NEXT: [[N_RND_UP:%.*]] = add i64 1023, [[TMP6]] -; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; RV64-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; RV64-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; RV64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; RV64-NEXT: br label %[[VECTOR_BODY:.*]] @@ -813,12 +765,6 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[ENTRY:.*]]: ; RV32-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV32: [[VECTOR_PH]]: -; RV32-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; RV32-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; RV32-NEXT: [[N_RND_UP:%.*]] = add i64 1023, [[TMP6]] -; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; RV32-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; RV32-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; RV32-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; RV32-NEXT: br label %[[VECTOR_BODY:.*]] @@ -865,14 +811,12 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1023, [[TMP1]] ; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: -; RV64-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; RV64-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1023, [[TMP3]] -; RV64-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1023, [[N_MOD_VF]] ; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; RV64-UF2-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]] ; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]] +; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]] ; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; RV64-UF2: [[VECTOR_BODY]]: ; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -911,13 +855,13 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[REVERSE4:%.*]] = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[TMP21]]) ; RV64-UF2-NEXT: store <vscale x 4 x float> [[REVERSE4]], ptr [[TMP32]], align 4 ; RV64-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; RV64-UF2-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; RV64-UF2-NEXT: br i1 [[TMP33]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; RV64-UF2-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[TMP7]] +; RV64-UF2-NEXT: br i1 [[TMP34]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV64-UF2: [[MIDDLE_BLOCK]]: -; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1023, [[N_VEC]] +; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1023, [[TMP7]] ; RV64-UF2-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] ; RV64-UF2: [[SCALAR_PH]]: -; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] +; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP33]], %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] ; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] ; RV64-UF2: [[FOR_BODY]]: ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll index 770026e..10d74c0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -12,12 +12,6 @@ define void @test(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 200, [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -131,12 +125,6 @@ define void @trivial_due_max_vscale(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 200, [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -196,12 +184,6 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 200, [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll index b8d9c28..8bfeac8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll @@ -10,12 +10,6 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP7]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -76,12 +70,6 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP7]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0 @@ -180,12 +168,6 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP7]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -243,12 +225,6 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP7]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -314,12 +290,6 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP9]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -371,12 +341,6 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP9]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[V:%.*]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index c037b70..c807891 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -11,12 +11,6 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -76,12 +70,6 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -139,12 +127,6 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -210,12 +192,6 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -267,12 +243,6 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 @@ -334,8 +304,6 @@ define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -344,7 +312,7 @@ define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] ; CHECK-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP5]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: @@ -387,12 +355,6 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index 2d2d76a..384cba5d2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -7,12 +7,6 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0 @@ -79,12 +73,6 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X]], i64 0 @@ -151,12 +139,6 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP20]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -221,12 +203,6 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP20]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -291,12 +267,6 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP20]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -398,12 +368,6 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP11]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index f0a74fc..aa90c8b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -8,12 +8,6 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH1:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() @@ -81,12 +75,6 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP10]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() @@ -157,12 +145,6 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH1:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -231,12 +213,6 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1 ; NOSTRIDED-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED: vector.ph: -; NOSTRIDED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 -; NOSTRIDED-NEXT: [[TMP2:%.*]] = sub i64 [[TMP4]], 1 -; NOSTRIDED-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -313,12 +289,6 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1 ; NOSTRIDED-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED: vector.ph: -; NOSTRIDED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 -; NOSTRIDED-NEXT: [[TMP2:%.*]] = sub i64 [[TMP4]], 1 -; NOSTRIDED-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -444,12 +414,6 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NOSTRIDED-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED: vector.ph: -; NOSTRIDED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 -; NOSTRIDED-NEXT: [[TMP11:%.*]] = sub i64 [[TMP8]], 1 -; NOSTRIDED-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP11]] -; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP8]] -; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -537,12 +501,6 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; STRIDED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; STRIDED: vector.ph: -; STRIDED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 -; STRIDED-NEXT: [[TMP42:%.*]] = sub i64 [[TMP9]], 1 -; STRIDED-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP42]] -; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP9]] -; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; STRIDED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; STRIDED-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 ; STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0 @@ -618,12 +576,6 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1 ; NOSTRIDED-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED: vector.ph: -; NOSTRIDED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 -; NOSTRIDED-NEXT: [[TMP2:%.*]] = sub i64 [[TMP4]], 1 -; NOSTRIDED-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -740,12 +692,6 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; STRIDED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; STRIDED: vector.ph: -; STRIDED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 -; STRIDED-NEXT: [[TMP10:%.*]] = sub i64 [[TMP9]], 1 -; STRIDED-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP10]] -; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP9]] -; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; STRIDED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; STRIDED-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4 ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll index 38e7832..f539ccf 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll @@ -22,12 +22,6 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -84,8 +78,6 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -94,7 +86,7 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = and <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 1) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -148,12 +140,6 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -210,8 +196,6 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -220,7 +204,7 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = or <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 1) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -274,12 +258,6 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -336,8 +314,6 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -346,7 +322,7 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = xor <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 1) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -400,12 +376,6 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -462,8 +432,6 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -472,7 +440,7 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = shl <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 1) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -526,12 +494,6 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -588,8 +550,6 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -598,7 +558,7 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = lshr <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 1) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -652,12 +612,6 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -714,8 +668,6 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -724,7 +676,7 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = ashr <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 1) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -778,12 +730,6 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -840,8 +786,6 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -850,7 +794,7 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = add <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 1) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -904,12 +848,6 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -966,8 +904,6 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -976,7 +912,7 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = sub <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 1) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1030,12 +966,6 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1092,8 +1022,6 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1102,7 +1030,7 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = mul <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 3) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1156,12 +1084,6 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1218,8 +1140,6 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1228,7 +1148,7 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = sdiv <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 3) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1282,12 +1202,6 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1344,8 +1258,6 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1354,7 +1266,7 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = udiv <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 3) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1408,12 +1320,6 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1470,8 +1376,6 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1480,7 +1384,7 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = srem <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 3) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1534,12 +1438,6 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1596,8 +1494,6 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 16 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1606,7 +1502,7 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP10:%.*]] = urem <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 3) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 16 x i8> [[TMP10]], ptr [[TMP11]], align 1 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1663,12 +1559,6 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP8]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1726,8 +1616,6 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1736,7 +1624,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP12:%.*]] = fadd fast <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP13]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1791,12 +1679,6 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP8]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1854,8 +1736,6 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1864,7 +1744,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP12:%.*]] = fsub fast <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP13]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1919,12 +1799,6 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP8]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1982,8 +1856,6 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1992,7 +1864,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP12:%.*]] = fmul fast <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP13]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -2047,12 +1919,6 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP8]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -2110,8 +1976,6 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -2120,7 +1984,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP12:%.*]] = fdiv fast <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP13]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -2228,12 +2092,6 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP8]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -2291,8 +2149,6 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -2301,7 +2157,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP12:%.*]] = fneg fast <vscale x 4 x float> [[WIDE_LOAD]] ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP13]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll index f604745..d2f3355 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll @@ -27,12 +27,6 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP28]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP28]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -99,8 +93,6 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -111,7 +103,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[WIDE_LOAD5]]) ; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP15]], ptr [[TMP16]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NO-VP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -176,12 +168,6 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP28]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP28]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -248,8 +234,6 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -260,7 +244,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.smin.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[WIDE_LOAD5]]) ; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP15]], ptr [[TMP16]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NO-VP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -325,12 +309,6 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP28]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP28]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -397,8 +375,6 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -409,7 +385,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[WIDE_LOAD5]]) ; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP15]], ptr [[TMP16]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NO-VP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -474,12 +450,6 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP28]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP28]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -546,8 +516,6 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -558,7 +526,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.umin.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[WIDE_LOAD5]]) ; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP15]], ptr [[TMP16]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NO-VP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -619,12 +587,6 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP21]], [[TMP20]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP23]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP23]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -682,8 +644,6 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -692,7 +652,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]], i1 true) ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP12]], ptr [[TMP13]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -748,12 +708,6 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP7]], [[TMP6]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 -; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 [[TMP9]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP10]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP9]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -811,8 +765,6 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -821,7 +773,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]], i1 true) ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP12]], ptr [[TMP13]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -877,12 +829,6 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP26:%.*]] = mul nuw i64 [[TMP25]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP26]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP26]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -944,8 +890,6 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -956,7 +900,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP14:%.*]] = trunc <vscale x 4 x i64> [[TMP13]] to <vscale x 4 x i32> ; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP14]], ptr [[TMP15]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] ; NO-VP-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1016,12 +960,6 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP26:%.*]] = mul nuw i64 [[TMP25]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP26]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP26]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1083,8 +1021,6 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1095,7 +1031,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP14:%.*]] = trunc <vscale x 4 x i64> [[TMP13]] to <vscale x 4 x i32> ; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP14]], ptr [[TMP15]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] ; NO-VP-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1155,12 +1091,6 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP21]], [[TMP20]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP23]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP23]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1218,8 +1148,6 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1228,7 +1156,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]], i1 true) ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP12]], ptr [[TMP13]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index 2be74e5..6db81b3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -22,12 +22,6 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; IF-EVL-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 -; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP9]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP8]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -85,8 +79,6 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -95,7 +87,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP10:%.*]] = sext <vscale x 2 x i32> [[WIDE_LOAD]] to <vscale x 2 x i64> ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 2 x i64> [[TMP10]], ptr [[TMP11]], align 8, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -150,12 +142,6 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; IF-EVL-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 -; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP9]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP8]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -213,8 +199,6 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -223,7 +207,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP10:%.*]] = zext <vscale x 2 x i32> [[WIDE_LOAD]] to <vscale x 2 x i64> ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 2 x i64> [[TMP10]], ptr [[TMP11]], align 8, !alias.scope [[META12:![0-9]+]], !noalias [[META9]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -278,12 +262,6 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; IF-EVL-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 -; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP9]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP8]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -341,8 +319,6 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -351,7 +327,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP10:%.*]] = trunc <vscale x 2 x i64> [[WIDE_LOAD]] to <vscale x 2 x i32> ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 2 x i32> [[TMP10]], ptr [[TMP11]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META16]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -406,12 +382,6 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; IF-EVL-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 -; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP9]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP8]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -469,8 +439,6 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -479,7 +447,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP10:%.*]] = fpext <vscale x 2 x float> [[WIDE_LOAD]] to <vscale x 2 x double> ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 2 x double> [[TMP10]], ptr [[TMP11]], align 8, !alias.scope [[META26:![0-9]+]], !noalias [[META23]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -534,12 +502,6 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; IF-EVL-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 -; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP9]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP8]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -597,8 +559,6 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -607,7 +567,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP10:%.*]] = fptrunc <vscale x 2 x double> [[WIDE_LOAD]] to <vscale x 2 x float> ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 2 x float> [[TMP10]], ptr [[TMP11]], align 4, !alias.scope [[META33:![0-9]+]], !noalias [[META30]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP6]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -662,12 +622,6 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP11]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -725,8 +679,6 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -735,7 +687,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP12:%.*]] = sitofp <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x float> ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP13]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -790,12 +742,6 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP11]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -853,8 +799,6 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -863,7 +807,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP12:%.*]] = uitofp <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x float> ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP13]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -918,12 +862,6 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP11]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -981,8 +919,6 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -991,7 +927,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP12:%.*]] = fptosi <vscale x 4 x float> [[WIDE_LOAD]] to <vscale x 4 x i32> ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP12]], ptr [[TMP13]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1046,12 +982,6 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP11]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1109,8 +1039,6 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1119,7 +1047,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP12:%.*]] = fptoui <vscale x 4 x float> [[WIDE_LOAD]] to <vscale x 4 x i32> ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP12]], ptr [[TMP13]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1174,12 +1102,6 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2 -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP11]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 2 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1237,8 +1159,6 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1247,7 +1167,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP12:%.*]] = inttoptr <vscale x 2 x i64> [[WIDE_LOAD]] to <vscale x 2 x ptr> ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[IV]] ; NO-VP-NEXT: store <vscale x 2 x ptr> [[TMP12]], ptr [[TMP13]], align 8 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP10]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP8]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -1293,12 +1213,6 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[ENTRY:.*]]: ; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 ; IF-EVL-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() @@ -1353,12 +1267,10 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; NO-VP-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; NO-VP-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1) ; NO-VP-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]] -; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]] +; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP3]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1369,7 +1281,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP10:%.*]] = ptrtoint <vscale x 2 x ptr> [[TMP9]] to <vscale x 2 x i64> ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 2 x i64> [[TMP10]], ptr [[TMP11]], align 8 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll index 76a830a..0c67e47 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll @@ -23,12 +23,6 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: entry: ; IF-EVL-OUTLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL-OUTLOOP: vector.ph: -; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 @@ -76,12 +70,6 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: entry: ; IF-EVL-INLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL-INLOOP: vector.ph: -; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 -; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 -; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] -; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] -; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -134,8 +122,6 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 ; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-OUTLOOP: vector.body: @@ -146,7 +132,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 3) ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP21]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32> zeroinitializer ; NO-VP-OUTLOOP-NEXT: [[TMP17]] = add <vscale x 4 x i32> [[TMP16]], [[VEC_PHI]] -; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; NO-VP-OUTLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: @@ -184,8 +170,6 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-INLOOP: vector.body: ; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -196,7 +180,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> zeroinitializer ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]]) ; NO-VP-INLOOP-NEXT: [[TMP12]] = add i32 [[VEC_PHI]], [[TMP11]] -; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP-INLOOP: middle.block: @@ -246,12 +230,6 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: entry: ; IF-EVL-OUTLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL-OUTLOOP: vector.ph: -; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 @@ -308,12 +286,6 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: entry: ; IF-EVL-INLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL-INLOOP: vector.ph: -; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 -; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 -; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] -; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] -; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -369,8 +341,6 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 ; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-OUTLOOP: vector.body: @@ -381,7 +351,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 3) ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]] ; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP18]], <vscale x 4 x i32> [[TMP16]], <vscale x 4 x i32> [[VEC_PHI]] -; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; NO-VP-OUTLOOP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: @@ -423,8 +393,6 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-INLOOP: vector.body: ; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -435,7 +403,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> zeroinitializer ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]]) ; NO-VP-INLOOP-NEXT: [[TMP12]] = add i32 [[VEC_PHI]], [[TMP11]] -; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; NO-VP-INLOOP: middle.block: @@ -495,12 +463,6 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: entry: ; IF-EVL-OUTLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL-OUTLOOP: vector.ph: -; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 @@ -557,12 +519,6 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: entry: ; IF-EVL-INLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL-INLOOP: vector.ph: -; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() @@ -624,13 +580,11 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 ; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]] -; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 +; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP7]] to i32 ; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] ; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP17]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer @@ -644,7 +598,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP27:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_IND]] ; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = select <vscale x 4 x i1> [[TMP27]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32> zeroinitializer ; NO-VP-OUTLOOP-NEXT: [[TMP23]] = add <vscale x 4 x i32> [[TMP22]], [[VEC_PHI]] -; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -684,12 +638,10 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i32> [[TMP6]], splat (i32 1) ; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]] -; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32 +; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP3]] to i32 ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] ; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0 ; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer @@ -704,7 +656,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> zeroinitializer ; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP16]]) ; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[VEC_PHI]], [[TMP17]] -; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -757,12 +709,6 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: entry: ; IF-EVL-OUTLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL-OUTLOOP: vector.ph: -; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 @@ -828,12 +774,6 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: entry: ; IF-EVL-INLOOP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL-INLOOP: vector.ph: -; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] -; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() @@ -898,13 +838,11 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 ; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]] -; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 +; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP7]] to i32 ; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] ; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP17]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer @@ -918,7 +856,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP28:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_IND]] ; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]] ; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP28]], <vscale x 4 x i32> [[TMP22]], <vscale x 4 x i32> [[VEC_PHI]] -; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -962,12 +900,10 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i32> [[TMP6]], splat (i32 1) ; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]] -; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32 +; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP3]] to i32 ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] ; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0 ; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer @@ -982,7 +918,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> zeroinitializer ; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP16]]) ; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[VEC_PHI]], [[TMP17]] -; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] ; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll index a216aa8..0d1d9a9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll @@ -13,12 +13,6 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]: ; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -71,8 +65,6 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP13]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -83,7 +75,7 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-NEXT: [[TMP8:%.*]] = sdiv <vscale x 2 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 2 x i64> [[TMP8]], ptr [[TMP9]], align 8 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -134,12 +126,6 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]: ; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -192,8 +178,6 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP13]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -204,7 +188,7 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-NEXT: [[TMP8:%.*]] = udiv <vscale x 2 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 2 x i64> [[TMP8]], ptr [[TMP9]], align 8 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -254,12 +238,6 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]: ; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -312,8 +290,6 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP13]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -324,7 +300,7 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-NEXT: [[TMP8:%.*]] = srem <vscale x 2 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 2 x i64> [[TMP8]], ptr [[TMP9]], align 8 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -374,12 +350,6 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]: ; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -432,8 +402,6 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP13]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -444,7 +412,7 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; NO-VP-NEXT: [[TMP8:%.*]] = urem <vscale x 2 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 2 x i64> [[TMP8]], ptr [[TMP9]], align 8 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index f92bf5a..76afbd45d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -15,12 +15,6 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[ENTRY:.*]]: ; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP8]] to i32 @@ -78,8 +72,6 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() ; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 @@ -94,7 +86,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: [[TMP13:%.*]] = add nsw <vscale x 4 x i32> [[TMP12]], [[WIDE_LOAD]] ; NO-VP-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP13]], ptr [[TMP14]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -147,12 +139,6 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[ENTRY:.*]]: ; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP8]] to i32 @@ -218,8 +204,6 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() ; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 @@ -240,7 +224,7 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: [[TMP17:%.*]] = add nsw <vscale x 4 x i32> [[TMP15]], [[TMP16]] ; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP17]], ptr [[TMP18]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -300,12 +284,6 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[ENTRY:.*]]: ; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP39:%.*]] = trunc i64 [[TMP8]] to i32 @@ -381,8 +359,6 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() ; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 @@ -410,7 +386,7 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: [[TMP22:%.*]] = add <vscale x 4 x i32> [[TMP21]], [[TMP18]] ; NO-VP-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP22]], ptr [[TMP23]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -486,8 +462,6 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4 ; IF-EVL-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 @@ -502,7 +476,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add nsw <vscale x 4 x i32> [[TMP10]], [[WIDE_LOAD]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS]] ; IF-EVL-NEXT: store <vscale x 4 x i32> [[TMP11]], ptr [[TMP12]], align 4 -; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS]], [[TMP5]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS]], [[TMP3]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -547,8 +521,6 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() ; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 @@ -563,7 +535,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: [[TMP13:%.*]] = add nsw <vscale x 4 x i32> [[TMP12]], [[WIDE_LOAD]] ; NO-VP-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP13]], ptr [[TMP14]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP: [[MIDDLE_BLOCK]]: @@ -621,12 +593,6 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; IF-EVL-NEXT: [[ENTRY:.*]]: ; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: -; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP2]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP18]], 2 ; IF-EVL-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 @@ -689,12 +655,10 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP4]], 2 ; NO-VP-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; NO-VP-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1) ; NO-VP-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]] -; NO-VP-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]] +; NO-VP-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP3]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP10]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; NO-VP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() @@ -710,7 +674,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; NO-VP-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> [[VECTOR_RECUR]], <vscale x 2 x i64> [[TMP12]], i32 -1) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 2 x i64> [[TMP13]], ptr [[TMP11]], align 8 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; NO-VP-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll index da5aed9..47339e0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll @@ -12,12 +12,6 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 ; IF-EVL-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() @@ -27,7 +21,7 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP12]] @@ -75,12 +69,10 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; NO-VP-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; NO-VP-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1) ; NO-VP-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]] -; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]] +; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP3]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] @@ -93,7 +85,7 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde ; NO-VP-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> [[TMP10]], i32 4, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x float> poison) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <vscale x 2 x i64> [[WIDE_MASKED_GATHER]] ; NO-VP-NEXT: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> [[WIDE_MASKED_GATHER2]], <vscale x 2 x ptr> [[TMP11]], i32 4, <vscale x 2 x i1> splat (i1 true)) -; NO-VP-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], [[TMP5]] +; NO-VP-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], [[TMP3]] ; NO-VP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; NO-VP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll index 433d1e4..f425010 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll @@ -14,19 +14,13 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -67,8 +61,6 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -77,7 +69,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP10]] = add i32 [[VEC_PHI]], [[TMP9]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: @@ -229,19 +221,13 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -282,8 +268,6 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -292,7 +276,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP10]] = or i32 [[VEC_PHI]], [[TMP9]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP: middle.block: @@ -337,19 +321,13 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -390,8 +368,6 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -400,7 +376,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP10]] = and i32 [[VEC_PHI]], [[TMP9]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP: middle.block: @@ -445,19 +421,13 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -498,8 +468,6 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -508,7 +476,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP10]] = xor i32 [[VEC_PHI]], [[TMP9]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NO-VP: middle.block: @@ -553,19 +521,13 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -607,8 +569,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -617,7 +577,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) ; NO-VP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NO-VP: middle.block: @@ -664,19 +624,13 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -718,8 +672,6 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -728,7 +680,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) ; NO-VP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smax.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; NO-VP: middle.block: @@ -775,19 +727,13 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -829,8 +775,6 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -839,7 +783,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) ; NO-VP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umin.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; NO-VP: middle.block: @@ -886,19 +830,13 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -940,8 +878,6 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -950,7 +886,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) ; NO-VP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP9]], i32 [[VEC_PHI]]) -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; NO-VP: middle.block: @@ -997,19 +933,13 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -1050,8 +980,6 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1060,7 +988,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP10]] = fadd reassoc float [[VEC_PHI]], [[TMP9]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; NO-VP: middle.block: @@ -1212,19 +1140,13 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -1267,8 +1189,6 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1278,7 +1198,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]]) ; NO-VP-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP9]], [[VEC_PHI]] ; NO-VP-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP9]], float [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; NO-VP: middle.block: @@ -1325,19 +1245,13 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -1380,8 +1294,6 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1391,7 +1303,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]]) ; NO-VP-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt float [[TMP9]], [[VEC_PHI]] ; NO-VP-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP9]], float [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; NO-VP: middle.block: @@ -1654,19 +1566,13 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP10]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) @@ -1712,8 +1618,6 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1725,7 +1629,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; NO-VP-NEXT: [[TMP11:%.*]] = fmul reassoc <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; NO-VP-NEXT: [[TMP12:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP11]]) ; NO-VP-NEXT: [[TMP13]] = fadd reassoc float [[VEC_PHI]], [[TMP12]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; NO-VP: middle.block: @@ -1774,19 +1678,13 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -1831,8 +1729,6 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1841,7 +1737,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) ; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; NO-VP: middle.block: @@ -1891,19 +1787,13 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) @@ -1948,8 +1838,6 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1958,7 +1846,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; NO-VP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll index c5d2739..8198409 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll @@ -13,18 +13,12 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP2]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer @@ -75,8 +69,6 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -88,7 +80,7 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; NO-VP-NEXT: [[TMP21:%.*]] = add nsw <vscale x 4 x i32> [[TMP19]], [[TMP18]] ; NO-VP-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP21]], ptr [[TMP22]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll index be6ae1d..84ea3b9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll @@ -31,12 +31,6 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-OUTLOOP-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; IF-EVL-OUTLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[ENTRY:%.*]] ; IF-EVL-OUTLOOP: vector.ph: -; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 -; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP7]] -; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 ; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 @@ -89,12 +83,6 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-INLOOP-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; IF-EVL-INLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; IF-EVL-INLOOP: vector.ph: -; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 -; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP9]], 1 -; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP10]] -; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP9]] -; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -152,8 +140,6 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-OUTLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 ; NO-VP-OUTLOOP-NEXT: [[TMP8:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-OUTLOOP: vector.body: @@ -162,7 +148,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; NO-VP-OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP10]], align 4, !alias.scope [[META0:![0-9]+]] ; NO-VP-OUTLOOP-NEXT: [[TMP12]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] -; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: @@ -207,8 +193,6 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-INLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-INLOOP: vector.body: ; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -217,7 +201,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META0:![0-9]+]] ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) ; NO-VP-INLOOP-NEXT: [[TMP12]] = add i32 [[VEC_PHI]], [[TMP11]] -; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; NO-VP-INLOOP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll index 62a4f73..acfad66 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll @@ -12,18 +12,12 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i32 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i32 [[N:%.*]], [[TMP8]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[TMP11:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[TMP11:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP11]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) @@ -61,8 +55,6 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { ; NO-VP-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP11]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP2]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -70,7 +62,7 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4 ; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[WIDE_LOAD]], ptr [[TMP6]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP12]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP11]] ; NO-VP-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll index 296405d..60e0aab 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll @@ -16,12 +16,6 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TC]], i64 1) ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -85,12 +79,6 @@ define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK: [[LOOP_PREHEADER]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -154,12 +142,6 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK: [[LOOP_PREHEADER]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC_ADD]], [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll index e06bbe9..7a39502 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll @@ -12,18 +12,12 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP9:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP9:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) @@ -72,8 +66,6 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[INC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] @@ -84,7 +76,7 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; NO-VP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison) ; NO-VP-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_LOAD]] ; NO-VP-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP9]], ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP7]]) -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[INC]] ; NO-VP-NEXT: br i1 [[TMP10]], label [[FOR_INC:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll index 775d9ca..8142154 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll @@ -14,19 +14,13 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP9:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP9:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) @@ -66,8 +60,6 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N1]], [[TMP3]] ; NO-VP-NEXT: [[N:%.*]] = sub i64 [[N1]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -75,7 +67,7 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) { ; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[ARRAYIDX]], align 4 ; NO-VP-NEXT: [[ADD]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[SUM_07]], <vscale x 4 x float> [[WIDE_LOAD]]) -; NO-VP-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP5]] +; NO-VP-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP3]] ; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll index 464667d..afbed37a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll @@ -12,12 +12,6 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0 @@ -25,7 +19,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) @@ -67,8 +61,6 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: @@ -77,7 +69,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 ; NO-VP-NEXT: [[TMP10]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: @@ -230,12 +222,6 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0 @@ -243,7 +229,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) @@ -285,8 +271,6 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: @@ -295,7 +279,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 ; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; NO-VP: middle.block: @@ -341,12 +325,6 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> splat (i32 -1), i32 [[START:%.*]], i32 0 @@ -354,7 +332,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) @@ -396,8 +374,6 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> splat (i32 -1), i32 [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: @@ -406,7 +382,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 ; NO-VP-NEXT: [[TMP10]] = and <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP: middle.block: @@ -452,12 +428,6 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0 @@ -465,7 +435,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) @@ -507,8 +477,6 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: @@ -517,7 +485,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 ; NO-VP-NEXT: [[TMP10]] = xor <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; NO-VP: middle.block: @@ -563,12 +531,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 @@ -577,7 +539,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) @@ -621,8 +583,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -633,7 +593,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; NO-VP: middle.block: @@ -681,12 +641,6 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 @@ -695,7 +649,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) @@ -739,8 +693,6 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -751,7 +703,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; NO-VP: middle.block: @@ -799,12 +751,6 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 @@ -813,7 +759,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) @@ -857,8 +803,6 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -869,7 +813,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = icmp ult <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; NO-VP: middle.block: @@ -917,12 +861,6 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 @@ -931,7 +869,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) @@ -975,8 +913,6 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -987,7 +923,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = icmp ugt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; NO-VP: middle.block: @@ -1035,12 +971,6 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0 @@ -1048,7 +978,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) @@ -1090,8 +1020,6 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: @@ -1100,7 +1028,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 ; NO-VP-NEXT: [[TMP10]] = fadd reassoc <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; NO-VP: middle.block: @@ -1253,12 +1181,6 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[START:%.*]], i64 0 @@ -1267,7 +1189,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) @@ -1311,8 +1233,6 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[START:%.*]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1323,7 +1243,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; NO-VP: middle.block: @@ -1371,12 +1291,6 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[START:%.*]], i64 0 @@ -1385,7 +1299,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) @@ -1429,8 +1343,6 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[START:%.*]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1441,7 +1353,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast ogt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]] ; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; NO-VP: middle.block: @@ -1705,12 +1617,6 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0 @@ -1718,7 +1624,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) @@ -1764,8 +1670,6 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: @@ -1776,7 +1680,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] ; NO-VP-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP10]], align 4 ; NO-VP-NEXT: [[TMP12]] = call reassoc <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD1]], <vscale x 4 x float> [[VEC_PHI]]) -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; NO-VP: middle.block: @@ -1826,19 +1730,13 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) @@ -1883,8 +1781,6 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1893,7 +1789,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) ; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; NO-VP: middle.block: @@ -1943,19 +1839,13 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP6]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) @@ -2000,8 +1890,6 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -2010,7 +1898,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; NO-VP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index 397cb95..3f378c7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -12,12 +12,6 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] @@ -81,8 +75,6 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] ; NO-VP-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32 ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] @@ -91,22 +83,22 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; NO-VP-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]] ; NO-VP-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], -1 ; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP8]] -; NO-VP-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP5]] -; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP3]] +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP3]], 1 ; NO-VP-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP11]] ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP10]] ; NO-VP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP12]] ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP14]], align 4 ; NO-VP-NEXT: [[REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) ; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP8]] -; NO-VP-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP5]] -; NO-VP-NEXT: [[TMP17:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP3]] +; NO-VP-NEXT: [[TMP17:%.*]] = sub i64 [[TMP3]], 1 ; NO-VP-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP17]] ; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[TMP16]] ; NO-VP-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP18]] ; NO-VP-NEXT: [[REVERSE1:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[REVERSE]]) ; NO-VP-NEXT: store <vscale x 4 x i32> [[REVERSE1]], ptr [[TMP20]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: @@ -154,12 +146,6 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] @@ -236,8 +222,6 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[STARTVAL1:%.*]], [[N_VEC]] ; NO-VP-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32 ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] @@ -250,8 +234,8 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4 ; NO-VP-NEXT: [[TMP10:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 100) ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP8]] -; NO-VP-NEXT: [[TMP12:%.*]] = mul i64 0, [[TMP5]] -; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[TMP12:%.*]] = mul i64 0, [[TMP3]] +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 ; NO-VP-NEXT: [[TMP14:%.*]] = mul i64 -1, [[TMP13]] ; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP11]], i64 [[TMP12]] ; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i64 [[TMP14]] @@ -259,15 +243,15 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; NO-VP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP16]], i32 4, <vscale x 4 x i1> [[REVERSE]], <vscale x 4 x i32> poison) ; NO-VP-NEXT: [[REVERSE2:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]]) ; NO-VP-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP8]] -; NO-VP-NEXT: [[TMP18:%.*]] = mul i64 0, [[TMP5]] -; NO-VP-NEXT: [[TMP19:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[TMP18:%.*]] = mul i64 0, [[TMP3]] +; NO-VP-NEXT: [[TMP19:%.*]] = sub i64 [[TMP3]], 1 ; NO-VP-NEXT: [[TMP20:%.*]] = mul i64 -1, [[TMP19]] ; NO-VP-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP17]], i64 [[TMP18]] ; NO-VP-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP20]] ; NO-VP-NEXT: [[REVERSE3:%.*]] = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> [[TMP10]]) ; NO-VP-NEXT: [[REVERSE4:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[REVERSE2]]) ; NO-VP-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[REVERSE4]], ptr [[TMP22]], i32 4, <vscale x 4 x i1> [[REVERSE3]]) -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; NO-VP: middle.block: @@ -334,12 +318,6 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll index 2ec23b91..e32af06 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll @@ -15,12 +15,6 @@ define void @test(ptr %p) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 200, [[TMP2]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] @@ -67,8 +61,6 @@ define void @test(ptr %p) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; NO-VP-NEXT: br label [[LOOP:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -77,7 +69,7 @@ define void @test(ptr %p) { ; NO-VP-NEXT: [[TMP7:%.*]] = add i64 [[IV]], 200 ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] ; NO-VP-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP8]], align 8 -; NO-VP-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP5]] +; NO-VP-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP3]] ; NO-VP-NEXT: [[TMP9:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: @@ -346,12 +338,6 @@ define void @trivial_due_max_vscale(ptr %p) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 200, [[TMP2]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] @@ -398,8 +384,6 @@ define void @trivial_due_max_vscale(ptr %p) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; NO-VP-NEXT: br label [[LOOP:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -408,7 +392,7 @@ define void @trivial_due_max_vscale(ptr %p) { ; NO-VP-NEXT: [[TMP7:%.*]] = add i64 [[IV]], 8192 ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] ; NO-VP-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP8]], align 32 -; NO-VP-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP5]] +; NO-VP-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP3]] ; NO-VP-NEXT: [[TMP9:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP: middle.block: @@ -454,11 +438,6 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP1:%.*]] = sub i64 [[TMP7]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 3002, [[TMP1]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll index ab05166..aad4dcd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll @@ -15,12 +15,6 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 3, [[SPEC_SELECT]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP7]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index 68d55d0..22c9b2e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -10,12 +10,6 @@ define void @test_pr98413_zext_removed(ptr %src, ptr noalias %dst, i64 %x) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 97, [[TMP9]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[X]], i64 0 @@ -83,12 +77,6 @@ define void @test_pr98413_sext_removed(ptr %src, ptr noalias %dst, i64 %x) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 97, [[TMP9]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[X]], i64 0 @@ -157,12 +145,6 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 9, [[TMP12]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP10]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST]], i64 0 @@ -265,12 +247,6 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[V]], 1 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP4]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP13]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[N]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll index 01edeed..661fd28 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll @@ -11,12 +11,6 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 9, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll index 6476373..c1fedd9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll @@ -20,12 +20,6 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count) ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP8]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x ptr> poison, ptr [[DSTV]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 722fe5e..ae894d1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -12,12 +12,6 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; SCALABLE-NEXT: [[ENTRY:.*]]: ; SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; SCALABLE: [[VECTOR_PH]]: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP6]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -90,12 +84,6 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: [[ENTRY:.*]]: ; TF-SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; TF-SCALABLE: [[VECTOR_PH]]: -; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -158,8 +146,6 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; SCALABLE: [[VECTOR_BODY]]: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -168,7 +154,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; SCALABLE-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP8]], align 8 -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: @@ -236,8 +222,6 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] ; TF-SCALABLE: [[VECTOR_BODY]]: ; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -246,7 +230,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[ARRAYIDX]], align 8 -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP5]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP3]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: @@ -290,12 +274,6 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[ENTRY:.*]]: ; SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; SCALABLE: [[VECTOR_PH]]: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[TMP14:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP14]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[B]], i64 0 @@ -404,12 +382,6 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[ENTRY:.*]]: ; TF-SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; TF-SCALABLE: [[VECTOR_PH]]: -; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[B]], i64 0 @@ -494,12 +466,6 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; SCALABLE-NEXT: [[ENTRY:.*]]: ; SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; SCALABLE: [[VECTOR_PH]]: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[TMP7:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP7]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -572,12 +538,6 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: [[ENTRY:.*]]: ; TF-SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; TF-SCALABLE: [[VECTOR_PH]]: -; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -633,12 +593,6 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; SCALABLE-NEXT: [[ENTRY:.*]]: ; SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; SCALABLE: [[VECTOR_PH]]: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP6]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0 @@ -711,12 +665,6 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: [[ENTRY:.*]]: ; TF-SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; TF-SCALABLE: [[VECTOR_PH]]: -; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0 @@ -772,12 +720,6 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: [[ENTRY:.*]]: ; SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; SCALABLE: [[VECTOR_PH]]: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP9]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B]], i64 0 @@ -865,12 +807,6 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[ENTRY:.*]]: ; TF-SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; TF-SCALABLE: [[VECTOR_PH]]: -; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B]], i64 0 @@ -937,12 +873,6 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[ENTRY:.*]]: ; SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; SCALABLE: [[VECTOR_PH]]: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[TMP11:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP11]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0 @@ -1045,12 +975,6 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[ENTRY:.*]]: ; TF-SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; TF-SCALABLE: [[VECTOR_PH]]: -; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0 @@ -1129,12 +1053,6 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[ENTRY:.*]]: ; SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; SCALABLE: [[VECTOR_PH]]: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 -; SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP6]] -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0 @@ -1207,12 +1125,6 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[ENTRY:.*]]: ; TF-SCALABLE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; TF-SCALABLE: [[VECTOR_PH]]: -; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll index d93a5c0..2400198 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll @@ -8,12 +8,6 @@ define void @foo(ptr %arg) #0 { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 3, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll index d3c3c6b..8d287fe 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll @@ -12,18 +12,12 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: entry: ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IF-EVL: vector.ph: -; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 -; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP8]] -; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) @@ -68,8 +62,6 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -80,7 +72,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: [[TMP11:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] ; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP11]], ptr [[TMP12]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll index 31501f6..1f3bd45 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll @@ -17,12 +17,6 @@ define void @vf_will_not_generate_any_vector_insts(ptr %src, ptr %dst) { ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP9]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP8]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP10]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll index 4145967..864d221 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll @@ -23,9 +23,6 @@ define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr ; CHECK: [[LOOP_3_PREHEADER]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[IV_1_LCSSA2]], 15 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[IV_1_LCSSA2]], 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll index baa967c..53de252 100644 --- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll @@ -13,7 +13,7 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]], !dbg [[DBG21]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ], !dbg [[DBG222:![0-9]+]] -; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]] +; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]] ; CHECK: for.cond5.preheader1: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG22:![0-9]+]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll index 2bafa6c..b266ddf 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll @@ -15,7 +15,6 @@ define i64 @pr97452_scalable_vf1_for_live_out(ptr %src) { ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 0, i32 [[TMP4]] @@ -26,7 +25,7 @@ define i64 @pr97452_scalable_vf1_for_live_out(ptr %src) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD]] = load <vscale x 1 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i64> @llvm.vector.splice.nxv1i64(<vscale x 1 x i64> [[VECTOR_RECUR]], <vscale x 1 x i64> [[WIDE_LOAD]], i32 -1) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -83,7 +82,6 @@ define void @pr97452_scalable_vf1_for_no_live_out(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 0, i32 [[TMP4]] @@ -96,7 +94,7 @@ define void @pr97452_scalable_vf1_for_no_live_out(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i64> @llvm.vector.splice.nxv1i64(<vscale x 1 x i64> [[VECTOR_RECUR]], <vscale x 1 x i64> [[WIDE_LOAD]], i32 -1) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 1 x i64> [[TMP7]], ptr [[TMP8]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/histograms.ll b/llvm/test/Transforms/LoopVectorize/histograms.ll index 1adc0bf..f0ceae7 100644 --- a/llvm/test/Transforms/LoopVectorize/histograms.ll +++ b/llvm/test/Transforms/LoopVectorize/histograms.ll @@ -1,25 +1,48 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt < %s -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -force-vector-width=2 -S | FileCheck %s -;; Currently we don't expect this to vectorize, since the generic cost model returns -;; invalid for the histogram intrinsic. define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %N) { ; CHECK-LABEL: define void @simple_histogram( ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[TMP5]], i64 1 +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.v2p0.i32(<2 x ptr> [[TMP7]], i32 1, <2 x i1> splat (i1 true)) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INDICES]], i64 [[IV]] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]] ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll index 4fda9d3..31c3248 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll @@ -24,12 +24,10 @@ define void @foo() { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP19]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP3]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -52,7 +50,7 @@ define void @foo() { ; CHECK: vector.latch: ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <vscale x 4 x float> [ [[TMP12]], [[INNER_LOOP1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI5]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index c5838fe..8d9ee2a 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -26,7 +26,7 @@ define i32 @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16, !dbg [[DBG9]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]], !dbg [[DBG9]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644, !dbg [[DBG9]] +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]], !dbg [[DBG9]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], !dbg [[DBG9]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll index 8291164..83541f2 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll @@ -14,8 +14,6 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -39,7 +37,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] ; CHECK-NEXT: store <vscale x 2 x float> [[TMP14]], ptr [[TMP16]], align 4 ; CHECK-NEXT: store <vscale x 2 x float> [[TMP15]], ptr [[TMP19]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -90,8 +88,6 @@ define void @test2(ptr %a, ptr noalias %b) { ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -113,7 +109,7 @@ define void @test2(ptr %a, ptr noalias %b) { ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]] ; CHECK-NEXT: store <vscale x 2 x float> [[TMP14]], ptr [[TMP16]], align 4 ; CHECK-NEXT: store <vscale x 2 x float> [[TMP15]], ptr [[TMP19]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -159,13 +155,11 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP7]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index bfc0a48..07402ab 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -36,8 +36,6 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP11]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-VF4UF1-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4 ; CHECK-VF4UF1-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF1-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP14]], 4 ; CHECK-VF4UF1-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 @@ -53,7 +51,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF1-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-VF4UF1-NEXT: [[TMP22:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[TMP20]] ; CHECK-VF4UF1-NEXT: store <vscale x 4 x i32> [[TMP22]], ptr [[TMP21]], align 4 -; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; CHECK-VF4UF1-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF1-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF4UF1: [[MIDDLE_BLOCK]]: @@ -101,8 +99,6 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 8 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP11]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 8 ; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP14]], 4 ; CHECK-VF4UF2-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 @@ -128,7 +124,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP30]] ; CHECK-VF4UF2-NEXT: store <vscale x 4 x i32> [[TMP26]], ptr [[TMP25]], align 4 ; CHECK-VF4UF2-NEXT: store <vscale x 4 x i32> [[TMP27]], ptr [[TMP31]], align 4 -; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; CHECK-VF4UF2-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF2-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF4UF2: [[MIDDLE_BLOCK]]: @@ -198,8 +194,6 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-VF4UF1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-VF4UF1-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 4 ; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], 1 @@ -217,7 +211,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF1-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i32> [[TMP13]], <vscale x 4 x i32> zeroinitializer ; CHECK-VF4UF1-NEXT: [[TMP16:%.*]] = icmp slt <vscale x 4 x i32> [[VEC_PHI]], [[TMP15]] ; CHECK-VF4UF1-NEXT: [[TMP17]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP15]] -; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-VF4UF1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF1-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF4UF1: [[MIDDLE_BLOCK]]: @@ -259,8 +253,6 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 ; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF2-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 4 ; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], 1 @@ -289,7 +281,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF2-NEXT: [[TMP24:%.*]] = icmp slt <vscale x 4 x i32> [[VEC_PHI1]], [[TMP22]] ; CHECK-VF4UF2-NEXT: [[TMP25]] = select <vscale x 4 x i1> [[TMP23]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP21]] ; CHECK-VF4UF2-NEXT: [[TMP26]] = select <vscale x 4 x i1> [[TMP24]], <vscale x 4 x i32> [[VEC_PHI1]], <vscale x 4 x i32> [[TMP22]] -; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-VF4UF2-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF2-NEXT: br i1 [[TMP27]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF4UF2: [[MIDDLE_BLOCK]]: @@ -388,8 +380,6 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP12]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-VF4UF1-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4 ; CHECK-VF4UF1-NEXT: [[TMP15:%.*]] = add i64 1, [[N_VEC]] ; CHECK-VF4UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x double> poison, double [[CONV1]], i64 0 ; CHECK-VF4UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x double> [[BROADCAST_SPLATINSERT]], <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -411,7 +401,7 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF1-NEXT: [[TMP25:%.*]] = fsub fast <vscale x 4 x double> [[TMP22]], [[TMP24]] ; CHECK-VF4UF1-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-VF4UF1-NEXT: store <vscale x 4 x double> [[TMP25]], ptr [[TMP26]], align 8, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] -; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] +; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] ; CHECK-VF4UF1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF1-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-VF4UF1: [[MIDDLE_BLOCK]]: @@ -466,8 +456,6 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP12]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8 ; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = add i64 1, [[N_VEC]] ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x double> poison, double [[CONV1]], i64 0 ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x double> [[BROADCAST_SPLATINSERT]], <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -502,7 +490,7 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF2-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[TMP37]] ; CHECK-VF4UF2-NEXT: store <vscale x 4 x double> [[TMP32]], ptr [[TMP34]], align 8, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] ; CHECK-VF4UF2-NEXT: store <vscale x 4 x double> [[TMP33]], ptr [[TMP38]], align 8, !alias.scope [[META9]], !noalias [[META6]] -; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] +; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] ; CHECK-VF4UF2-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF2-NEXT: br i1 [[TMP39]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-VF4UF2: [[MIDDLE_BLOCK]]: @@ -568,12 +556,10 @@ define i64 @constant_folded_previous_value() { ; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]] -; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-VF4UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4UF1: [[VECTOR_BODY]]: ; CHECK-VF4UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-VF4UF1: [[MIDDLE_BLOCK]]: @@ -596,12 +582,10 @@ define i64 @constant_folded_previous_value() { ; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]] -; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 ; CHECK-VF4UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4UF2: [[VECTOR_BODY]]: ; CHECK-VF4UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-VF4UF2: [[MIDDLE_BLOCK]]: @@ -646,14 +630,12 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 4 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i32 96, [[TMP3]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i32 96, [[N_MOD_VF]] -; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF1-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 4 ; CHECK-VF4UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0 ; CHECK-VF4UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; CHECK-VF4UF1-NEXT: [[TMP7:%.*]] = mul <vscale x 4 x i32> [[TMP6]], splat (i32 1) ; CHECK-VF4UF1-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP7]] -; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = mul i32 1, [[TMP5]] +; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = mul i32 1, [[TMP3]] ; CHECK-VF4UF1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP8]], i64 0 ; CHECK-VF4UF1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-VF4UF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -661,7 +643,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4UF1-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] +; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] ; CHECK-VF4UF1-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-VF4UF1-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF1-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] @@ -690,13 +672,11 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 96, [[TMP1]] ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: -; CHECK-VF4UF2-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 8 -; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i32 96, [[TMP3]] -; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i32 96, [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 4 ; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2 +; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i32 96, [[TMP6]] +; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i32 96, [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0 ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() @@ -778,8 +758,6 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-VF4UF1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF1-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 ; CHECK-VF4UF1-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 @@ -797,7 +775,7 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF1-NEXT: [[TMP18:%.*]] = mul nsw <vscale x 4 x i32> [[TMP17]], [[TMP16]] ; CHECK-VF4UF1-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-VF4UF1-NEXT: store <vscale x 4 x i32> [[TMP18]], ptr [[TMP19]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] -; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-VF4UF1-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF1-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK-VF4UF1: [[MIDDLE_BLOCK]]: @@ -837,8 +815,6 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 ; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 ; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 @@ -868,7 +844,7 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[TMP29]] ; CHECK-VF4UF2-NEXT: store <vscale x 4 x i32> [[TMP24]], ptr [[TMP26]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] ; CHECK-VF4UF2-NEXT: store <vscale x 4 x i32> [[TMP25]], ptr [[TMP30]], align 4, !alias.scope [[META20]], !noalias [[META17]] -; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-VF4UF2-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF2-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK-VF4UF2: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index 6264941..ce4592c 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -15,13 +15,11 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -97,17 +95,15 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP4]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP2]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] @@ -122,7 +118,7 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP14]] ; CHECK-NEXT: store <vscale x 1 x i64> [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store <vscale x 1 x i64> [[TMP12]], ptr [[TMP15]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -185,13 +181,11 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[DOTCAST]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP7:%.*]] = shl <vscale x 4 x i32> [[TMP6]], splat (i32 1) -; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 1 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer @@ -201,7 +195,7 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 4 x i32> [[VEC_IND]], ptr [[TMP10]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -262,8 +256,6 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 ; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float ; CHECK-NEXT: [[TMP4:%.*]] = fmul float [[DOTCAST]], 2.000000e+00 ; CHECK-NEXT: [[IND_END:%.*]] = fadd float [[TMP4]], 0.000000e+00 @@ -271,7 +263,7 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = uitofp <vscale x 4 x i32> [[TMP7]] to <vscale x 4 x float> ; CHECK-NEXT: [[TMP9:%.*]] = fmul <vscale x 4 x float> [[TMP8]], splat (float 2.000000e+00) ; CHECK-NEXT: [[INDUCTION:%.*]] = fadd <vscale x 4 x float> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = uitofp i64 [[TMP6]] to float +; CHECK-NEXT: [[TMP12:%.*]] = uitofp i64 [[TMP3]] to float ; CHECK-NEXT: [[TMP13:%.*]] = fmul float [[TMP12]], 2.000000e+00 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -281,7 +273,7 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 4 x float> [[VEC_IND]], ptr [[TMP14]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd <vscale x 4 x float> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll index 1ec2993..0467a58 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll @@ -12,13 +12,11 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 2000, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 4 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP6]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[STEP_2]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32() diff --git a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll index 9cbf6b7..bf14c87 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll @@ -20,18 +20,16 @@ define void @test(ptr %d) { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 128, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 128, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ARR]]) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[INDEX]] -; CHECK-NEXT: store <vscale x 2 x i32> splat (i32 100), ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[INDEX]] +; CHECK-NEXT: store <vscale x 2 x i32> splat (i32 100), ptr [[TMP4]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ARR]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -42,7 +40,7 @@ define void @test(ptr %d) { ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ARR]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: store i32 100, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ARR]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 @@ -90,18 +88,16 @@ define void @testloopvariant(ptr %d) { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 128, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 128, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ARR]]) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[INDEX]] -; CHECK-NEXT: store <vscale x 2 x i32> splat (i32 100), ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[INDEX]] +; CHECK-NEXT: store <vscale x 2 x i32> splat (i32 100), ptr [[TMP4]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ARR]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -110,10 +106,10 @@ define void @testloopvariant(ptr %d) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr [1024 x i32], ptr [[ARR]], i32 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [1024 x i32], ptr [[ARR]], i32 0, i64 [[INDVARS_IV]] ; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ARR]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: store i32 100, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ARR]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll index f384d3c..901f228 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -12,8 +12,6 @@ ; CHECKUF1-DAG: %[[VSCALEX4:.*]] = shl nuw i64 %[[VSCALE]], 2 ; CHECKUF1-DAG: %n.mod.vf = urem i64 %wide.trip.count, %[[VSCALEX4]] ; CHECKUF1: %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf -; CHECKUF1: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() -; CHECKUF1: %[[VSCALEX4:.*]] = shl nuw i64 %[[VSCALE]], 2 ; CHECKUF1: vector.body: ; CHECKUF1: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] @@ -41,8 +39,6 @@ ; CHECKUF2-DAG: %[[VSCALEX8:.*]] = shl nuw i64 %[[VSCALE]], 3 ; CHECKUF2-DAG: %n.mod.vf = urem i64 %wide.trip.count, %[[VSCALEX8]] ; CHECKUF2: %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf -; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() -; CHECKUF2: %[[VSCALEX8:.*]] = shl nuw i64 %[[VSCALE]], 3 ; CHECKUF2: vector.body: ; CHECKUF2: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll index a3a4c29..ffa2602 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll @@ -19,13 +19,11 @@ define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 256, [[TMP2]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 256) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index ba337aa..c2ae92e 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -15,8 +15,6 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 256, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 256, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -38,7 +36,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP35:%.*]] = trunc <vscale x 8 x i32> [[TMP29]] to <vscale x 8 x i8> ; CHECK-NEXT: [[TMP34]] = zext <vscale x 8 x i8> [[TMP33]] to <vscale x 8 x i32> ; CHECK-NEXT: [[TMP36]] = zext <vscale x 8 x i8> [[TMP35]] to <vscale x 8 x i32> -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll index 4b8ff86..4495ed6 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll @@ -13,8 +13,6 @@ define void @trunc_minimal_bitwidth(ptr %bptr, ptr noalias %hptr, i32 %val, i64 ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = trunc <vscale x 4 x i32> [[BROADCAST_SPLAT]] to <vscale x 4 x i16> @@ -23,7 +21,7 @@ define void @trunc_minimal_bitwidth(ptr %bptr, ptr noalias %hptr, i32 %val, i64 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[HPTR:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 4 x i16> [[TMP4]], ptr [[TMP5]], align 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -73,8 +71,6 @@ define void @trunc_minimal_bitwidths_shufflevector (ptr %p, i32 %arg1, i64 %len) ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[LEN]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[LEN]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[ARG1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = trunc <vscale x 4 x i32> [[BROADCAST_SPLAT]] to <vscale x 4 x i8> @@ -86,7 +82,7 @@ define void @trunc_minimal_bitwidths_shufflevector (ptr %p, i32 %arg1, i64 %len) ; CHECK-NEXT: [[TMP6:%.*]] = xor <vscale x 4 x i8> [[WIDE_LOAD]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 4 x i8> [[TMP6]], [[WIDE_LOAD]] ; CHECK-NEXT: store <vscale x 4 x i8> [[TMP7]], ptr [[TMP5]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index 983f327..c86a404 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -152,9 +152,6 @@ define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, ; VF8UF1-NEXT: [[TMP0:%.*]] = add nsw i64 [[N]], -2 ; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF8UF1: [[VECTOR_PH]]: -; VF8UF1-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 7 -; VF8UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8 -; VF8UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VF8UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1 ; VF8UF1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; VF8UF1-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer @@ -239,9 +236,6 @@ define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, ; VF8UF2-NEXT: [[TMP0:%.*]] = add nsw i64 [[N]], -2 ; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF8UF2: [[VECTOR_PH]]: -; VF8UF2-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 15 -; VF8UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 -; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VF8UF2-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1 ; VF8UF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; VF8UF2-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer @@ -383,9 +377,6 @@ define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, ; VF16UF1-NEXT: [[TMP0:%.*]] = add nsw i64 [[N]], -2 ; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF16UF1: [[VECTOR_PH]]: -; VF16UF1-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 15 -; VF16UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 -; VF16UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VF16UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1 ; VF16UF1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; VF16UF1-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer @@ -701,9 +692,6 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF8UF1-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 ; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF8UF1: [[VECTOR_PH]]: -; VF8UF1-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP1]], 7 -; VF8UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8 -; VF8UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VF8UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1 ; VF8UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; VF8UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer @@ -817,9 +805,6 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF8UF2-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 ; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF8UF2: [[VECTOR_PH]]: -; VF8UF2-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP1]], 15 -; VF8UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 -; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VF8UF2-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1 ; VF8UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; VF8UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer @@ -1014,9 +999,6 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF16UF1-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 ; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF16UF1: [[VECTOR_PH]]: -; VF16UF1-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP1]], 15 -; VF16UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 -; VF16UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; VF16UF1-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1 ; VF16UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; VF16UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll index d3f7794..4af9767 100644 --- a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll @@ -45,8 +45,6 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP1]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP14]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; NO-VP-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -57,7 +55,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: [[TMP16:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] ; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; NO-VP-NEXT: store <vscale x 4 x i32> [[TMP16]], ptr [[TMP9]], align 4 -; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP: middle.block: @@ -90,7 +88,6 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-DEF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-DEF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]] ; NO-VP-DEF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-DEF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-DEF-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-DEF: vector.body: ; NO-VP-DEF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -101,7 +98,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-DEF-NEXT: [[TMP8:%.*]] = add nsw <vscale x 1 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] ; NO-VP-DEF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; NO-VP-DEF-NEXT: store <vscale x 1 x i32> [[TMP8]], ptr [[TMP9]], align 4 -; NO-VP-DEF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] +; NO-VP-DEF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; NO-VP-DEF-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-DEF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-VP-DEF: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index 128594c..e779233 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -89,8 +89,8 @@ define void @iv_expand(ptr %p, i64 %n) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK: VPlan 'Final VPlan for VF={8},UF={1}' ; CHECK: ir-bb<vector.ph>: -; CHECK-NEXT: IR %n.mod.vf = urem i64 %n, 8 -; CHECK-NEXT: IR %n.vec = sub i64 %n, %n.mod.vf +; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%n>, ir<8> +; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%n>, vp<%n.mod.vf> ; CHECK-NEXT: EMIT vp<[[STEP_VECTOR:%.+]]> = step-vector ; CHECK-NEXT: EMIT vp<[[BROADCAST_0:%.+]]> = broadcast ir<0> ; CHECK-NEXT: EMIT vp<[[BROADCAST_1:%.+]]> = broadcast ir<1> @@ -109,7 +109,7 @@ define void @iv_expand(ptr %p, i64 %n) { ; CHECK-NEXT: WIDEN store ir<%q>, ir<%y> ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[SCALAR_PHI]]>, ir<8> ; CHECK-NEXT: EMIT vp<%vec.ind.next> = add ir<%iv>, vp<[[BROADCAST_INC]]> -; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, ir<%n.vec> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec> ; CHECK-NEXT: Successor(s): middle.block, vector.body entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll index 46f9125..49d87a2 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll @@ -5,16 +5,15 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK: VPlan 'Final VPlan for VF={2},UF={1}' { ; CHECK-NEXT: Live-in ir<[[VF:.+]]> = VF ; CHECK-NEXT: Live-in ir<[[VFxUF:.+]]> = VF * UF -; CHECK-NEXT: Live-in ir<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: ir<%0> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<entry>: ; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph> ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<vector.ph>: -; CHECK-NEXT: IR %n.mod.vf = urem i64 %0, 2 -; CHECK-NEXT: IR %n.vec = sub i64 %0, %n.mod.vf -; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%start> + ir<%n.vec> * ir<1> +; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%0>, ir<2> +; CHECK-NEXT: EMIT vp<[[VTC:%.+]]> = sub ir<%0>, vp<%n.mod.vf> +; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1> ; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: @@ -78,11 +77,11 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-EMPTY: ; CHECK-NEXT: default.2: ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: Successor(s): middle.block, vector.body ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<%0>, ir<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<%0>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> ; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index d856387..d200359 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -62,18 +62,18 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; ; CHECK: Executing best plan with VF=8, UF=2 ; CHECK-NEXT: VPlan 'Final VPlan for VF={8},UF={2}' { -; CHECK-NEXT: Live-in ir<[[VTC:%.+]]> = vector-trip-count -; CHECK-NEXT: ir<%and> = original trip-count +; CHECK-NEXT: Live-in ir<16> = VF * UF +; CHECK-NEXT: Live-in ir<%and> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<entry>: ; CHECK-NEXT: IR %and = and i64 %N, 15 ; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph> ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<vector.ph>: -; CHECK-NEXT: IR %n.mod.vf = urem i64 %and, 16 -; CHECK-NEXT: IR %n.vec = sub i64 %and, %n.mod.vf -; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%and> + ir<[[VTC]]> * ir<-1> -; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%A> + ir<[[VTC]]> * ir<1> +; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%and>, ir<16> +; CHECK-NEXT: EMIT vp<[[VTC:%.+]]> = sub ir<%and>, vp<%n.mod.vf> +; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%and> + vp<[[VTC]]> * ir<-1> +; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%A> + vp<[[VTC]]> * ir<1> ; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: @@ -88,7 +88,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq ir<%and>, ir<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq ir<%and>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[C]]> ; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll index f40afbd..ff085fc 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll @@ -24,8 +24,6 @@ define void @interleave_deinterleave(ptr noalias %dst, ptr %a, ptr %b) { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -47,7 +45,7 @@ define void @interleave_deinterleave(ptr noalias %dst, ptr %a, ptr %b) { ; CHECK-NEXT: [[TMP23:%.*]] = shl <vscale x 4 x i32> [[TMP11]], [[TMP18]] ; CHECK-NEXT: [[TMP24:%.*]] = ashr <vscale x 4 x i32> [[TMP12]], [[TMP19]] ; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x i32> [[TMP22]], <vscale x 4 x i32> [[TMP23]], <vscale x 4 x i32> [[TMP24]], <vscale x 4 x i1> splat (i1 true), ptr [[TMP21]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll b/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll index 405a26d..c649f29e 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll @@ -13,7 +13,7 @@ define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) { ; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16> -; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 8, <1 x i1> [[TMP0]]) ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64> |