diff options
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 21 | ||||
| -rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll | 170 |
2 files changed, 189 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4fcaf6d..43166c0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5608,6 +5608,7 @@ private: for (ScheduleBundle *Bundle : Bundles) { if (ScheduleCopyableDataMap.empty() && TotalOpCount == 0) break; + SmallPtrSet<Value *, 4> ParentsUniqueUsers; // Need to search for the lane since the tree entry can be // reordered. auto *It = find(Bundle->getTreeEntry()->Scalars, In); @@ -5636,6 +5637,22 @@ private: Bundle->getTreeEntry()->isCopyableElement(In)) && "Missed TreeEntry operands?"); + bool IsNonSchedulableWithParentPhiNode = + Bundle->getTreeEntry()->doesNotNeedToSchedule() && + Bundle->getTreeEntry()->UserTreeIndex && + Bundle->getTreeEntry()->UserTreeIndex.UserTE->hasState() && + Bundle->getTreeEntry()->UserTreeIndex.UserTE->getOpcode() == + Instruction::PHI; + // Count the number of unique phi nodes, which are the parent for + // parent entry, and exit, if all the unique phis are processed. + if (IsNonSchedulableWithParentPhiNode) { + const TreeEntry *ParentTE = + Bundle->getTreeEntry()->UserTreeIndex.UserTE; + Value *User = ParentTE->Scalars[Lane]; + if (!ParentsUniqueUsers.insert(User).second) + break; + } + for (unsigned OpIdx : seq<unsigned>(Bundle->getTreeEntry()->getNumOperands())) if (auto *I = dyn_cast<Instruction>( @@ -5644,8 +5661,8 @@ private: << *I << "\n"); DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx, Checked); } - // If parent node is schedulable, it will be handle correctly. - if (!Bundle->getTreeEntry()->doesNotNeedToSchedule()) + // If parent node is schedulable, it will be handled correctly. + if (!IsNonSchedulableWithParentPhiNode) break; It = std::find(std::next(It), Bundle->getTreeEntry()->Scalars.end(), In); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll new file mode 100644 index 0000000..9e96e93 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i64 @test(ptr %arg1, i64 %alloca.promoted344, i8 %load.311.i, i1 %load1.i) { +; CHECK-LABEL: define i64 @test( +; CHECK-SAME: ptr [[ARG1:%.*]], i64 [[ALLOCA_PROMOTED344:%.*]], i8 [[LOAD_311_I:%.*]], i1 [[LOAD1_I:%.*]]) { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> <i8 0, i8 0, i8 0, i8 poison>, i8 [[LOAD_311_I]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8> <i8 poison, i8 poison, i8 0, i8 0>, i8 [[LOAD_311_I]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[ALLOCA_PROMOTED344]], i32 0 +; CHECK-NEXT: br label %[[BB2:.*]] +; CHECK: [[BB2]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[BB]] ], [ [[TMP28:%.*]], %[[BB12_8_I:.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <8 x i8> [ zeroinitializer, %[[BB]] ], [ [[TMP29:%.*]], %[[BB12_8_I]] ] +; CHECK-NEXT: br i1 [[LOAD1_I]], label %[[SPAM_EXIT:.*]], label %[[BB4_LR_PH_I:.*]] +; CHECK: [[BB4_LR_PH_I]]: +; CHECK-NEXT: br i1 true, label %[[BB3_I_I_PEEL:.*]], label %[[EGGS_EXIT_I_PEEL:.*]] +; CHECK: [[BB3_I_I_PEEL]]: +; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP3]], splat (i64 1) +; CHECK-NEXT: [[LOAD4_I_I_PEEL:%.*]] = load i64, ptr [[ARG1]], align 8 +; CHECK-NEXT: [[SHL_I_I_PEEL:%.*]] = shl i64 [[LOAD4_I_I_PEEL]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> <i32 poison, i32 0> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[SHL_I_I_PEEL]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <2 x i64> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3> +; CHECK-NEXT: br label %[[EGGS_EXIT_I_PEEL]] +; CHECK: [[EGGS_EXIT_I_PEEL]]: +; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i64> [ [[TMP10]], %[[BB3_I_I_PEEL]] ], [ zeroinitializer, %[[BB4_LR_PH_I]] ] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP11]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0> +; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i64> [[TMP12]] to <4 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP12]], i32 1 +; CHECK-NEXT: br label %[[SPAM_EXIT]] +; CHECK: [[SPAM_EXIT]]: +; CHECK-NEXT: [[GETELEMENTPTR_I_I_PROMOTED346:%.*]] = phi i64 [ [[TMP14]], %[[EGGS_EXIT_I_PEEL]] ], [ 0, %[[BB2]] ] +; CHECK-NEXT: [[LOAD_8_I:%.*]] = phi i8 [ 0, %[[EGGS_EXIT_I_PEEL]] ], [ 1, %[[BB2]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i8> [ [[TMP13]], %[[EGGS_EXIT_I_PEEL]] ], [ zeroinitializer, %[[BB2]] ] +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: br i1 [[LOAD1_I]], label %[[BB12_8_I]], label %[[BB12_1_THREAD_I:.*]] +; CHECK: [[BB12_1_THREAD_I]]: +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i8> [[TMP4]], i32 0 +; CHECK-NEXT: [[ICMP5_3_I:%.*]] = icmp eq i8 [[TMP17]], 0 +; CHECK-NEXT: br i1 [[ICMP5_3_I]], label %[[BB12_3_I:.*]], label %[[BB8_3_I:.*]] +; CHECK: [[BB8_3_I]]: +; CHECK-NEXT: br label %[[BB12_3_I]] +; CHECK: [[BB12_3_I]]: +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i8> [[TMP4]], i32 1 +; CHECK-NEXT: [[ICMP5_4_I:%.*]] = icmp eq i8 [[TMP18]], 0 +; CHECK-NEXT: br i1 [[ICMP5_4_I]], label %[[BB12_4_I:.*]], label %[[BB8_4_I:.*]] +; CHECK: [[BB8_4_I]]: +; CHECK-NEXT: br label %[[BB12_4_I]] +; CHECK: [[BB12_4_I]]: +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i8> [[TMP4]], i32 2 +; CHECK-NEXT: [[ICMP5_5_I:%.*]] = icmp eq i8 [[TMP19]], 0 +; CHECK-NEXT: br i1 [[ICMP5_5_I]], label %[[BB12_5_I:.*]], label %[[BB8_5_I:.*]] +; CHECK: [[BB8_5_I]]: +; CHECK-NEXT: br label %[[BB12_5_I]] +; CHECK: [[BB12_5_I]]: +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i8> [[TMP4]], i32 3 +; CHECK-NEXT: [[ICMP5_7_I:%.*]] = icmp eq i8 [[TMP20]], 0 +; CHECK-NEXT: br i1 [[ICMP5_7_I]], label %[[BB12_7_I:.*]], label %[[BB8_7_I:.*]] +; CHECK: [[BB8_7_I]]: +; CHECK-NEXT: br label %[[BB12_7_I]] +; CHECK: [[BB12_7_I]]: +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i8> [[TMP4]], i32 4 +; CHECK-NEXT: [[ICMP5_8_I:%.*]] = icmp eq i8 [[TMP21]], 0 +; CHECK-NEXT: br i1 [[ICMP5_8_I]], label %[[BB12_8_I]], label %[[BB8_8_I:.*]] +; CHECK: [[BB8_8_I]]: +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP1]], i8 [[LOAD_8_I]], i32 1 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i8> poison, i8 [[LOAD_8_I]], i32 0 +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <4 x i32> <i32 poison, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP23]], <4 x i8> [[TMP24]], <4 x i32> <i32 0, i32 5, i32 6, i32 7> +; CHECK-NEXT: br label %[[BB12_8_I]] +; CHECK: [[BB12_8_I]]: +; CHECK-NEXT: [[TMP26:%.*]] = phi <4 x i8> [ [[TMP0]], %[[BB12_7_I]] ], [ [[TMP22]], %[[BB8_8_I]] ], [ [[TMP15]], %[[SPAM_EXIT]] ] +; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i8> [ zeroinitializer, %[[BB12_7_I]] ], [ [[TMP25]], %[[BB8_8_I]] ], [ [[TMP16]], %[[SPAM_EXIT]] ] +; CHECK-NEXT: [[TMP28]] = insertelement <2 x i64> [[TMP2]], i64 [[GETELEMENTPTR_I_I_PROMOTED346]], i32 1 +; CHECK-NEXT: [[TMP29]] = shufflevector <4 x i8> [[TMP26]], <4 x i8> [[TMP27]], <8 x i32> <i32 2, i32 7, i32 5, i32 0, i32 1, i32 3, i32 4, i32 6> +; CHECK-NEXT: br label %[[BB2]] +; +bb: + br label %bb2 + +bb2: + %getelementptr.i.i.promoted = phi i64 [ 0, %bb ], [ %getelementptr.i.i.promoted346, %bb12.8.i ] + %alloca.promoted = phi i64 [ 0, %bb ], [ %alloca.promoted344, %bb12.8.i ] + %load.8.i231 = phi i8 [ 0, %bb ], [ %load.8.i239, %bb12.8.i ] + %load.7.i217 = phi i8 [ 0, %bb ], [ %load.7.i225, %bb12.8.i ] + %load.626.i200 = phi i8 [ 0, %bb ], [ %load.626.i208, %bb12.8.i ] + %load.6.i183 = phi i8 [ 0, %bb ], [ %load.6.i191, %bb12.8.i ] + %load.5.i167 = phi i8 [ 0, %bb ], [ %load.5.i175, %bb12.8.i ] + %load.418.i148 = phi i8 [ 0, %bb ], [ %load.418.i156, %bb12.8.i ] + %load.4.i129 = phi i8 [ 0, %bb ], [ %load.4.i137, %bb12.8.i ] + %load.3.i111 = phi i8 [ 0, %bb ], [ %load.3.i119, %bb12.8.i ] + br i1 %load1.i, label %spam.exit, label %bb4.lr.ph.i + +bb4.lr.ph.i: + br i1 true, label %bb3.i.i.peel, label %eggs.exit.i.peel + +bb3.i.i.peel: + %and.i.i.peel = and i64 %alloca.promoted, 1 + %load4.i.i.peel = load i64, ptr %arg1, align 8 + %shl.i.i.peel = shl i64 %load4.i.i.peel, 1 + %or.i.i.peel = or i64 %shl.i.i.peel, %and.i.i.peel + %and6.i.i.peel = and i64 %getelementptr.i.i.promoted, 1 + %xor.i.i.peel = xor i64 %and6.i.i.peel, %alloca.promoted + br label %eggs.exit.i.peel + +eggs.exit.i.peel: + %load5.i.i93.peel = phi i64 [ %xor.i.i.peel, %bb3.i.i.peel ], [ 0, %bb4.lr.ph.i ] + %or.i.i91.peel = phi i64 [ %or.i.i.peel, %bb3.i.i.peel ], [ 0, %bb4.lr.ph.i ] + %0 = trunc i64 %or.i.i91.peel to i8 + %1 = trunc nuw i64 %or.i.i91.peel to i8 + %2 = trunc i64 %load5.i.i93.peel to i8 + br label %spam.exit + +spam.exit: + %getelementptr.i.i.promoted346 = phi i64 [ %load5.i.i93.peel, %eggs.exit.i.peel ], [ 0, %bb2 ] + %load.834.i = phi i8 [ %2, %eggs.exit.i.peel ], [ 0, %bb2 ] + %load.7.i25 = phi i8 [ %1, %eggs.exit.i.peel ], [ 0, %bb2 ] + %load.8.i = phi i8 [ 0, %eggs.exit.i.peel ], [ 1, %bb2 ] + %load.6.i18 = phi i8 [ %0, %eggs.exit.i.peel ], [ 0, %bb2 ] + br i1 %load1.i, label %bb12.8.i, label %bb12.1.thread.i + +bb12.1.thread.i: + %icmp5.3.i = icmp eq i8 %load.3.i111, 0 + br i1 %icmp5.3.i, label %bb12.3.i, label %bb8.3.i + +bb8.3.i: + br label %bb12.3.i + +bb12.3.i: + %icmp5.4.i = icmp eq i8 %load.4.i129, 0 + br i1 %icmp5.4.i, label %bb12.4.i, label %bb8.4.i + +bb8.4.i: + br label %bb12.4.i + +bb12.4.i: + %icmp5.5.i = icmp eq i8 %load.5.i167, 0 + br i1 %icmp5.5.i, label %bb12.5.i, label %bb8.5.i + +bb8.5.i: + br label %bb12.5.i + +bb12.5.i: + %icmp5.7.i = icmp eq i8 %load.7.i217, 0 + br i1 %icmp5.7.i, label %bb12.7.i, label %bb8.7.i + +bb8.7.i: + br label %bb12.7.i + +bb12.7.i: + %icmp5.8.i = icmp eq i8 %load.8.i231, 0 + br i1 %icmp5.8.i, label %bb12.8.i, label %bb8.8.i + +bb8.8.i: + br label %bb12.8.i + +bb12.8.i: + %load.8.i239 = phi i8 [ 0, %bb12.7.i ], [ %load.8.i, %bb8.8.i ], [ %load.834.i, %spam.exit ] + %load.7.i225 = phi i8 [ 0, %bb12.7.i ], [ %load.311.i, %bb8.8.i ], [ %load.7.i25, %spam.exit ] + %load.626.i208 = phi i8 [ 0, %bb12.7.i ], [ %load.8.i, %bb8.8.i ], [ %load.6.i18, %spam.exit ] + %load.6.i191 = phi i8 [ %load.311.i, %bb12.7.i ], [ 0, %bb8.8.i ], [ %load.6.i18, %spam.exit ] + %load.5.i175 = phi i8 [ 0, %bb12.7.i ], [ %load.6.i183, %bb8.8.i ], [ %load.6.i18, %spam.exit ] + %load.418.i156 = phi i8 [ 0, %bb12.7.i ], [ %load.626.i200, %bb8.8.i ], [ %load.6.i18, %spam.exit ] + %load.4.i137 = phi i8 [ 0, %bb12.7.i ], [ %load.418.i148, %bb8.8.i ], [ %load.6.i18, %spam.exit ] + %load.3.i119 = phi i8 [ 0, %bb12.7.i ], [ 0, %bb8.8.i ], [ %load.6.i18, %spam.exit ] + br label %bb2 +} |
