diff options
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 22 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll | 4 |
2 files changed, 21 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 0bceb70..8e05b01 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3131,6 +3131,9 @@ static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR, GroupSize == VectorRegWidth; } +/// Returns true if \p VPValue is a narrow VPValue. +static bool isAlreadyNarrow(VPValue *VPV) { return VPV->isLiveIn(); } + void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, unsigned VectorRegWidth) { using namespace llvm::VPlanPatternMatch; @@ -3182,6 +3185,16 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, if (InterleaveR->getStoredValues().empty()) continue; + // Narrow interleave groups, if all operands are already matching narrow + // ops. + auto *Member0 = InterleaveR->getStoredValues()[0]; + if (isAlreadyNarrow(Member0) && + all_of(InterleaveR->getStoredValues(), + [Member0](VPValue *VPV) { return Member0 == VPV; })) { + StoreGroups.push_back(InterleaveR); + continue; + } + // For now, we only support full interleave groups storing load interleave // groups. if (all_of(enumerate(InterleaveR->getStoredValues()), [](auto Op) { @@ -3252,13 +3265,16 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, // Narrow operation tree rooted at store groups. for (auto *StoreGroup : StoreGroups) { VPValue *Res = nullptr; - if (auto *WideMember0 = dyn_cast<VPWidenRecipe>( - StoreGroup->getStoredValues()[0]->getDefiningRecipe())) { + VPValue *Member0 = StoreGroup->getStoredValues()[0]; + if (isAlreadyNarrow(Member0)) { + Res = Member0; + } else if (auto *WideMember0 = + dyn_cast<VPWidenRecipe>(Member0->getDefiningRecipe())) { for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx) WideMember0->setOperand(Idx, NarrowOp(WideMember0->getOperand(Idx))); Res = WideMember0; } else { - Res = NarrowOp(StoreGroup->getStoredValues()[0]); + Res = NarrowOp(Member0); } auto *S = new VPWidenStoreRecipe( diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll index ea56fac..8cd850b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll @@ -216,8 +216,8 @@ define void @same_constant_store_interleave_group(i64 %x, ptr noalias %dst) { ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] -; VF2-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP1]], align 8 -; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: store <2 x i64> zeroinitializer, ptr [[TMP1]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1 ; VF2-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; VF2-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF2: [[MIDDLE_BLOCK]]: |