diff options
4 files changed, 18 insertions, 35 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index f8e878c..878db52 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -596,36 +596,11 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { if (!PhiR) continue; - // Try to narrow wide and replicating recipes to uniform recipes, based on - // VPlan analysis. - // TODO: Apply to all recipes in the future, to replace legacy uniformity - // analysis. - auto Users = collectUsersRecursively(PhiR); - for (VPUser *U : reverse(Users)) { - auto *Def = dyn_cast<VPSingleDefRecipe>(U); - auto *RepR = dyn_cast<VPReplicateRecipe>(U); - // Skip recipes that shouldn't be narrowed. - if (!isa<VPReplicateRecipe, VPWidenRecipe>(Def) || !Def || - Def->getNumUsers() == 0 || !Def->getUnderlyingValue() || - (RefR && (RepR->isUniform() || RepRr->isPredicated()))) - continue; - - // Skip recipes that may have other lanes than their first used. - if (!vputils::isUniformAfterVectorization(Def) && - !vputils::onlyFirstLaneUsed(Def)) - continue; - - auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(), - Def->operands(), /*IsUniform*/ true); - Clone->insertAfter(Def); - Def->replaceAllUsesWith(Clone); - } - // Check if any uniform VPReplicateRecipes using the phi recipe are used by // ExtractFromEnd. Those must be replaced by a regular VPReplicateRecipe to // ensure the final value is available. // TODO: Remove once uniformity analysis is done on VPlan. - for (VPUser *U : Users) { + for (VPUser *U : collectUsersRecursively(PhiR)) { auto *ExitIRI = dyn_cast<VPIRInstruction>(U); VPValue *Op; if (!ExitIRI || !match(ExitIRI->getOperand(0), diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 2c37593..9e5c6e1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -132,6 +132,8 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[CONV6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -140,9 +142,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = icmp ule <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP23:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP22]], <vscale x 2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x i1> [[TMP23]], i32 0 -; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[CONV6]], i64 1 -; CHECK-NEXT: [[TMP26:%.*]] = sdiv i64 [[M]], [[TMP25]] +; CHECK-NEXT: [[TMP24:%.*]] = select <vscale x 2 x i1> [[TMP23]], <vscale x 2 x i64> [[BROADCAST_SPLAT2]], <vscale x 2 x i64> splat (i64 1) +; CHECK-NEXT: [[TMP25:%.*]] = sdiv <vscale x 2 x i64> [[BROADCAST_SPLAT]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <vscale x 2 x i64> [[TMP25]], i32 0 ; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[TMP26]] to i32 ; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP26]], [[CONV61]] ; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[TMP21]], [[TMP28]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll index e22b090..a90b4df 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll @@ -12,12 +12,18 @@ define void @gep_use_in_dead_block(ptr noalias %dst, ptr %src) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 10) ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP8]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4i16.p0(<4 x i16> zeroinitializer, ptr [[TMP12]], i32 2, <4 x i1> [[TMP7]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll index 358293f..f4c099f 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll @@ -3,12 +3,12 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { ; CHECK-LABEL: @test1( ; CHECK: vector.body: -; CHECK: [[E1:%.*]] = extractelement <vscale x 2 x float> {{.+}}, i32 0 -; CHECK-NEXT: [[FCMP1:%.*]] = fcmp ogt float [[E1]] -; CHECK-NEXT: [[E2:%.*]] = extractelement <vscale x 2 x float> {{.+}}, i32 0 -; CHECK-NEXT: [[FCMP2:%.*]] = fcmp ogt float [[E2]] -; CHECK-NEXT: tail call void @llvm.assume(i1 [[FCMP1]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[FCMP2]]) +; CHECK: [[FCMP1:%.*]] = fcmp ogt <vscale x 2 x float> +; CHECK-NEXT: [[FCMP2:%.*]] = fcmp ogt <vscale x 2 x float> +; CHECK-NEXT: [[FCMP1L0:%.*]] = extractelement <vscale x 2 x i1> [[FCMP1]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[FCMP1L0]]) +; CHECK-NEXT: [[FCMP2L0:%.*]] = extractelement <vscale x 2 x i1> [[FCMP2]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[FCMP2L0]]) entry: br label %for.body |