aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp22
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll4
2 files changed, 21 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 0bceb70..8e05b01 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3131,6 +3131,9 @@ static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR,
GroupSize == VectorRegWidth;
}
+/// Returns true if \p VPValue is a narrow VPValue.
+static bool isAlreadyNarrow(VPValue *VPV) { return VPV->isLiveIn(); }
+
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
unsigned VectorRegWidth) {
using namespace llvm::VPlanPatternMatch;
@@ -3182,6 +3185,16 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
if (InterleaveR->getStoredValues().empty())
continue;
+ // Narrow interleave groups, if all operands are already matching narrow
+ // ops.
+ auto *Member0 = InterleaveR->getStoredValues()[0];
+ if (isAlreadyNarrow(Member0) &&
+ all_of(InterleaveR->getStoredValues(),
+ [Member0](VPValue *VPV) { return Member0 == VPV; })) {
+ StoreGroups.push_back(InterleaveR);
+ continue;
+ }
+
// For now, we only support full interleave groups storing load interleave
// groups.
if (all_of(enumerate(InterleaveR->getStoredValues()), [](auto Op) {
@@ -3252,13 +3265,16 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
// Narrow operation tree rooted at store groups.
for (auto *StoreGroup : StoreGroups) {
VPValue *Res = nullptr;
- if (auto *WideMember0 = dyn_cast<VPWidenRecipe>(
- StoreGroup->getStoredValues()[0]->getDefiningRecipe())) {
+ VPValue *Member0 = StoreGroup->getStoredValues()[0];
+ if (isAlreadyNarrow(Member0)) {
+ Res = Member0;
+ } else if (auto *WideMember0 =
+ dyn_cast<VPWidenRecipe>(Member0->getDefiningRecipe())) {
for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx)
WideMember0->setOperand(Idx, NarrowOp(WideMember0->getOperand(Idx)));
Res = WideMember0;
} else {
- Res = NarrowOp(StoreGroup->getStoredValues()[0]);
+ Res = NarrowOp(Member0);
}
auto *S = new VPWidenStoreRecipe(
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll
index ea56fac..8cd850b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory.ll
@@ -216,8 +216,8 @@ define void @same_constant_store_interleave_group(i64 %x, ptr noalias %dst) {
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]]
-; VF2-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP1]], align 8
-; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VF2-NEXT: store <2 x i64> zeroinitializer, ptr [[TMP1]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
; VF2-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; VF2-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; VF2: [[MIDDLE_BLOCK]]: