diff options
author | Martin Storsjö <martin@martin.st> | 2024-04-06 18:17:39 +0300 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2024-04-06 23:53:26 +0300 |
commit | bd9486b4ec7dc24f73f32474fa38b522a7cce085 (patch) | |
tree | 3b37d11866f14cdf7eb0b7d62e04c698b1280344 /llvm/lib | |
parent | d38bff460acb4fe3156d90ec739da49344db14ca (diff) | |
download | llvm-bd9486b4ec7dc24f73f32474fa38b522a7cce085.zip llvm-bd9486b4ec7dc24f73f32474fa38b522a7cce085.tar.gz llvm-bd9486b4ec7dc24f73f32474fa38b522a7cce085.tar.bz2 |
Revert "[SLP]Improve minbitwidth analysis for abs/smin/smax/umin/umax intrinsics."
This reverts commit 66b528078e4852412769375e35d2a672bf36a0ec.
This commit caused miscompilations, breaking tests in the libyuv
testsuite - see
https://github.com/llvm/llvm-project/pull/86135#issuecomment-2041049709
for more details.
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 102 |
1 files changed, 14 insertions, 88 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 731d7b4..332877f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7056,16 +7056,19 @@ bool BoUpSLP::areAllUsersVectorized( static std::pair<InstructionCost, InstructionCost> getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy, - TargetTransformInfo *TTI, TargetLibraryInfo *TLI, - ArrayRef<Type *> ArgTys) { + TargetTransformInfo *TTI, TargetLibraryInfo *TLI) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); // Calculate the cost of the scalar and vector calls. + SmallVector<Type *, 4> VecTys; + for (Use &Arg : CI->args()) + VecTys.push_back( + FixedVectorType::get(Arg->getType(), VecTy->getNumElements())); FastMathFlags FMF; if (auto *FPCI = dyn_cast<FPMathOperator>(CI)) FMF = FPCI->getFastMathFlags(); SmallVector<const Value *> Arguments(CI->args()); - IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, ArgTys, FMF, + IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, VecTys, FMF, dyn_cast<IntrinsicInst>(CI)); auto IntrinsicCost = TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput); @@ -7078,8 +7081,8 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy, if (!CI->isNoBuiltin() && VecFunc) { // Calculate the cost of the vector library call. // If the corresponding vector call is cheaper, return its cost. - LibCost = - TTI->getCallInstrCost(nullptr, VecTy, ArgTys, TTI::TCK_RecipThroughput); + LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys, + TTI::TCK_RecipThroughput); } return {IntrinsicCost, LibCost}; } @@ -8505,30 +8508,6 @@ TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const { return TTI::CastContextHint::None; } -/// Builds the arguments types vector for the given call instruction with the -/// given \p ID for the specified vector factor. -static SmallVector<Type *> buildIntrinsicArgTypes(const CallInst *CI, - const Intrinsic::ID ID, - const unsigned VF, - unsigned MinBW) { - SmallVector<Type *> ArgTys; - for (auto [Idx, Arg] : enumerate(CI->args())) { - if (ID != Intrinsic::not_intrinsic) { - if (isVectorIntrinsicWithScalarOpAtArg(ID, Idx)) { - ArgTys.push_back(Arg->getType()); - continue; - } - if (MinBW > 0) { - ArgTys.push_back(FixedVectorType::get( - IntegerType::get(CI->getContext(), MinBW), VF)); - continue; - } - } - ArgTys.push_back(FixedVectorType::get(Arg->getType(), VF)); - } - return ArgTys; -} - InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, SmallPtrSetImpl<Value *> &CheckedExtracts) { @@ -9095,11 +9074,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, }; auto GetVectorCost = [=](InstructionCost CommonCost) { auto *CI = cast<CallInst>(VL0); - Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); - SmallVector<Type *> ArgTys = - buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(), - It != MinBWs.end() ? It->second.first : 0); - auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys); + auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); return std::min(VecCallCosts.first, VecCallCosts.second) + CommonCost; }; return GetCostDiff(GetScalarCost, GetVectorCost); @@ -12571,10 +12546,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); - SmallVector<Type *> ArgTys = - buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(), - It != MinBWs.end() ? It->second.first : 0); - auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys); + auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); bool UseIntrinsic = ID != Intrinsic::not_intrinsic && VecCallCosts.first <= VecCallCosts.second; @@ -12583,7 +12555,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { SmallVector<Type *, 2> TysForDecl; // Add return type if intrinsic is overloaded on it. if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) - TysForDecl.push_back(VecTy); + TysForDecl.push_back( + FixedVectorType::get(CI->getType(), E->Scalars.size())); auto *CEI = cast<CallInst>(VL0); for (unsigned I : seq<unsigned>(0, CI->arg_size())) { ValueList OpVL; @@ -12591,12 +12564,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { // vectorized. if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { ScalarArg = CEI->getArgOperand(I); - // if decided to reduce bitwidth of abs intrinsic, it second argument - // must be set false (do not return poison, if value issigned min). - if (ID == Intrinsic::abs && It != MinBWs.end() && - It->second.first < DL->getTypeSizeInBits(CEI->getType())) - ScalarArg = Builder.getFalse(); - OpVecs.push_back(ScalarArg); + OpVecs.push_back(CEI->getArgOperand(I)); if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) TysForDecl.push_back(ScalarArg->getType()); continue; @@ -12609,13 +12577,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } ScalarArg = CEI->getArgOperand(I); if (cast<VectorType>(OpVec->getType())->getElementType() != - ScalarArg->getType() && - It == MinBWs.end()) { + ScalarArg->getType()) { auto *CastTy = FixedVectorType::get(ScalarArg->getType(), VecTy->getNumElements()); OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I)); - } else if (It != MinBWs.end()) { - OpVec = Builder.CreateIntCast(OpVec, VecTy, GetOperandSignedness(I)); } LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); @@ -14359,45 +14324,6 @@ bool BoUpSLP::collectValuesToDemote( return TryProcessInstruction(I, *ITE, BitWidth, Ops); } - case Instruction::Call: { - auto *IC = dyn_cast<IntrinsicInst>(I); - if (!IC) - break; - Intrinsic::ID ID = getVectorIntrinsicIDForCall(IC, TLI); - if (ID != Intrinsic::abs && ID != Intrinsic::smin && - ID != Intrinsic::smax && ID != Intrinsic::umin && ID != Intrinsic::umax) - break; - SmallVector<Value *> Operands(1, I->getOperand(0)); - End = 1; - if (ID != Intrinsic::abs) { - Operands.push_back(I->getOperand(1)); - End = 2; - } - InstructionCost BestCost = - std::numeric_limits<InstructionCost::CostType>::max(); - unsigned BestBitWidth = BitWidth; - unsigned VF = ITE->Scalars.size(); - // Choose the best bitwidth based on cost estimations. - auto Checker = [&](unsigned BitWidth, unsigned) { - unsigned MinBW = PowerOf2Ceil(BitWidth); - SmallVector<Type *> ArgTys = buildIntrinsicArgTypes(IC, ID, VF, MinBW); - auto VecCallCosts = getVectorCallCosts( - IC, - FixedVectorType::get(IntegerType::get(IC->getContext(), MinBW), VF), - TTI, TLI, ArgTys); - InstructionCost Cost = std::min(VecCallCosts.first, VecCallCosts.second); - if (Cost < BestCost) { - BestCost = Cost; - BestBitWidth = BitWidth; - } - return false; - }; - [[maybe_unused]] bool NeedToExit; - (void)AttemptCheckBitwidth(Checker, NeedToExit); - BitWidth = BestBitWidth; - return TryProcessInstruction(I, *ITE, BitWidth, Operands); - } - // Otherwise, conservatively give up. default: break; |