From fa2bbea14df3273b3403f34cc295c56233fdbd0d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 3 Apr 2024 13:10:16 -0700 Subject: Revert "[SLP]Improve minbitwidth analysis for operands of IToFP and ICmp instructions." This reverts commit 899855d2b11856a44e530fffe854d76be69b9008 to fix the issue reported in https://lab.llvm.org/buildbot/#/builders/165/builds/51659. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 50 +++++----------------- .../SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll | 4 +- .../X86/minbitwidth-node-with-multi-users.ll | 10 ++--- 3 files changed, 16 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 779c7b7..9b87e6e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1107,7 +1107,7 @@ public: MinBWs.clear(); ReductionBitWidth = 0; CastMaxMinBWSizes.reset(); - ExtraBitWidthNodes.clear(); + TruncNodes.clear(); InstrElementSize.clear(); UserIgnoreList = nullptr; PostponedGathers.clear(); @@ -3683,9 +3683,8 @@ private: /// type sizes, used in the tree. std::optional> CastMaxMinBWSizes; - /// Indices of the vectorized nodes, which supposed to be the roots of the new - /// bitwidth analysis attempt, like trunc, IToFP or ICmp. - DenseSet ExtraBitWidthNodes; + /// Indices of the vectorized trunc nodes. + DenseSet TruncNodes; }; } // end namespace slpvectorizer @@ -6613,18 +6612,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, PrevMaxBW), std::min(DL->getTypeSizeInBits(VL0->getType()), PrevMinBW)); - ExtraBitWidthNodes.insert(VectorizableTree.size() + 1); - } else if (ShuffleOrOp == Instruction::SIToFP || - ShuffleOrOp == Instruction::UIToFP) { - unsigned NumSignBits = - ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT); - if (auto *OpI = dyn_cast(VL0->getOperand(0))) { - APInt Mask = DB->getDemandedBits(OpI); - NumSignBits = std::max(NumSignBits, Mask.countl_zero()); - } - if (NumSignBits * 2 >= - DL->getTypeSizeInBits(VL0->getOperand(0)->getType())) - ExtraBitWidthNodes.insert(VectorizableTree.size() + 1); + TruncNodes.insert(VectorizableTree.size()); } TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); @@ -6672,18 +6660,6 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); - if (ShuffleOrOp == Instruction::ICmp) { - unsigned NumSignBits0 = - ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT); - if (NumSignBits0 * 2 >= - DL->getTypeSizeInBits(VL0->getOperand(0)->getType())) - ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx); - unsigned NumSignBits1 = - ComputeNumSignBits(VL0->getOperand(1), *DL, 0, AC, nullptr, DT); - if (NumSignBits1 * 2 >= - DL->getTypeSizeInBits(VL0->getOperand(1)->getType())) - ExtraBitWidthNodes.insert(getOperandEntry(TE, 1)->Idx); - } return; } case Instruction::Select: @@ -14326,8 +14302,7 @@ void BoUpSLP::computeMinimumValueSizes() { bool IsStoreOrInsertElt = VectorizableTree.front()->getOpcode() == Instruction::Store || VectorizableTree.front()->getOpcode() == Instruction::InsertElement; - if ((IsStoreOrInsertElt || UserIgnoreList) && - ExtraBitWidthNodes.size() <= 1 && + if ((IsStoreOrInsertElt || UserIgnoreList) && TruncNodes.size() <= 1 && (!CastMaxMinBWSizes || CastMaxMinBWSizes->second == 0 || CastMaxMinBWSizes->first / CastMaxMinBWSizes->second <= 2)) return; @@ -14531,21 +14506,16 @@ void BoUpSLP::computeMinimumValueSizes() { IsTopRoot = false; IsProfitableToDemoteRoot = true; - if (ExtraBitWidthNodes.empty()) { + if (TruncNodes.empty()) { NodeIdx = VectorizableTree.size(); } else { unsigned NewIdx = 0; do { - NewIdx = *ExtraBitWidthNodes.begin(); - ExtraBitWidthNodes.erase(ExtraBitWidthNodes.begin()); - } while (NewIdx <= NodeIdx && !ExtraBitWidthNodes.empty()); + NewIdx = *TruncNodes.begin() + 1; + TruncNodes.erase(TruncNodes.begin()); + } while (NewIdx <= NodeIdx && !TruncNodes.empty()); NodeIdx = NewIdx; - IsTruncRoot = any_of( - VectorizableTree[NewIdx]->UserTreeIndices, [](const EdgeInfo &EI) { - return EI.EdgeIdx == 0 && - EI.UserTE->getOpcode() == Instruction::ICmp && - !EI.UserTE->isAltShuffle(); - }); + IsTruncRoot = true; } // If the maximum bit width we compute is less than the with of the roots' diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll index e1fd8a7..fc28d7a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll @@ -19,8 +19,8 @@ define i1 @test(ptr noalias %0, i64 %1, ptr noalias %p, ptr %p1) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <2 x i24> [[TMP8]], ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i24> , <2 x i24> [[TMP8]] ; CHECK-NEXT: [[TMP23:%.*]] = trunc <2 x i24> [[TMP10]] to <2 x i8> -; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP26]], +; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP11]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <2 x i32> [[TMP12]], ; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP13]], <2 x i8> , <2 x i8> [[TMP23]] ; CHECK-NEXT: [[TMP14:%.*]] = zext <2 x i8> [[TMP25]] to <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll index 668d3c3..136ab64 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll @@ -10,14 +10,12 @@ define void @test() { ; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i32 0, i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> , i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i8> [[TMP5]] to <4 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i8> [[TMP5]] to <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1> -; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i1> [[TMP15]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP16]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[TMP8]] to <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i32> zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i32> [[TMP9]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> , <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]]) -- cgit v1.1