aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2024-04-06 18:17:39 +0300
committerMartin Storsjö <martin@martin.st>2024-04-06 23:53:26 +0300
commitbd9486b4ec7dc24f73f32474fa38b522a7cce085 (patch)
tree3b37d11866f14cdf7eb0b7d62e04c698b1280344 /llvm/lib
parentd38bff460acb4fe3156d90ec739da49344db14ca (diff)
downloadllvm-bd9486b4ec7dc24f73f32474fa38b522a7cce085.zip
llvm-bd9486b4ec7dc24f73f32474fa38b522a7cce085.tar.gz
llvm-bd9486b4ec7dc24f73f32474fa38b522a7cce085.tar.bz2
Revert "[SLP]Improve minbitwidth analysis for abs/smin/smax/umin/umax intrinsics."
This reverts commit 66b528078e4852412769375e35d2a672bf36a0ec. This commit caused miscompilations, breaking tests in the libyuv testsuite - see https://github.com/llvm/llvm-project/pull/86135#issuecomment-2041049709 for more details.
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp102
1 files changed, 14 insertions, 88 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 731d7b4..332877f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7056,16 +7056,19 @@ bool BoUpSLP::areAllUsersVectorized(
static std::pair<InstructionCost, InstructionCost>
getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
- TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
- ArrayRef<Type *> ArgTys) {
+ TargetTransformInfo *TTI, TargetLibraryInfo *TLI) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
// Calculate the cost of the scalar and vector calls.
+ SmallVector<Type *, 4> VecTys;
+ for (Use &Arg : CI->args())
+ VecTys.push_back(
+ FixedVectorType::get(Arg->getType(), VecTy->getNumElements()));
FastMathFlags FMF;
if (auto *FPCI = dyn_cast<FPMathOperator>(CI))
FMF = FPCI->getFastMathFlags();
SmallVector<const Value *> Arguments(CI->args());
- IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, ArgTys, FMF,
+ IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, VecTys, FMF,
dyn_cast<IntrinsicInst>(CI));
auto IntrinsicCost =
TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);
@@ -7078,8 +7081,8 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
if (!CI->isNoBuiltin() && VecFunc) {
// Calculate the cost of the vector library call.
// If the corresponding vector call is cheaper, return its cost.
- LibCost =
- TTI->getCallInstrCost(nullptr, VecTy, ArgTys, TTI::TCK_RecipThroughput);
+ LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys,
+ TTI::TCK_RecipThroughput);
}
return {IntrinsicCost, LibCost};
}
@@ -8505,30 +8508,6 @@ TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const {
return TTI::CastContextHint::None;
}
-/// Builds the arguments types vector for the given call instruction with the
-/// given \p ID for the specified vector factor.
-static SmallVector<Type *> buildIntrinsicArgTypes(const CallInst *CI,
- const Intrinsic::ID ID,
- const unsigned VF,
- unsigned MinBW) {
- SmallVector<Type *> ArgTys;
- for (auto [Idx, Arg] : enumerate(CI->args())) {
- if (ID != Intrinsic::not_intrinsic) {
- if (isVectorIntrinsicWithScalarOpAtArg(ID, Idx)) {
- ArgTys.push_back(Arg->getType());
- continue;
- }
- if (MinBW > 0) {
- ArgTys.push_back(FixedVectorType::get(
- IntegerType::get(CI->getContext(), MinBW), VF));
- continue;
- }
- }
- ArgTys.push_back(FixedVectorType::get(Arg->getType(), VF));
- }
- return ArgTys;
-}
-
InstructionCost
BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
SmallPtrSetImpl<Value *> &CheckedExtracts) {
@@ -9095,11 +9074,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
};
auto GetVectorCost = [=](InstructionCost CommonCost) {
auto *CI = cast<CallInst>(VL0);
- Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
- SmallVector<Type *> ArgTys =
- buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(),
- It != MinBWs.end() ? It->second.first : 0);
- auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys);
+ auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
return std::min(VecCallCosts.first, VecCallCosts.second) + CommonCost;
};
return GetCostDiff(GetScalarCost, GetVectorCost);
@@ -12571,10 +12546,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
- SmallVector<Type *> ArgTys =
- buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(),
- It != MinBWs.end() ? It->second.first : 0);
- auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys);
+ auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
bool UseIntrinsic = ID != Intrinsic::not_intrinsic &&
VecCallCosts.first <= VecCallCosts.second;
@@ -12583,7 +12555,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
SmallVector<Type *, 2> TysForDecl;
// Add return type if intrinsic is overloaded on it.
if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
- TysForDecl.push_back(VecTy);
+ TysForDecl.push_back(
+ FixedVectorType::get(CI->getType(), E->Scalars.size()));
auto *CEI = cast<CallInst>(VL0);
for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
ValueList OpVL;
@@ -12591,12 +12564,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
// vectorized.
if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) {
ScalarArg = CEI->getArgOperand(I);
- // if decided to reduce bitwidth of abs intrinsic, it second argument
- // must be set false (do not return poison, if value issigned min).
- if (ID == Intrinsic::abs && It != MinBWs.end() &&
- It->second.first < DL->getTypeSizeInBits(CEI->getType()))
- ScalarArg = Builder.getFalse();
- OpVecs.push_back(ScalarArg);
+ OpVecs.push_back(CEI->getArgOperand(I));
if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
TysForDecl.push_back(ScalarArg->getType());
continue;
@@ -12609,13 +12577,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
}
ScalarArg = CEI->getArgOperand(I);
if (cast<VectorType>(OpVec->getType())->getElementType() !=
- ScalarArg->getType() &&
- It == MinBWs.end()) {
+ ScalarArg->getType()) {
auto *CastTy = FixedVectorType::get(ScalarArg->getType(),
VecTy->getNumElements());
OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I));
- } else if (It != MinBWs.end()) {
- OpVec = Builder.CreateIntCast(OpVec, VecTy, GetOperandSignedness(I));
}
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
@@ -14359,45 +14324,6 @@ bool BoUpSLP::collectValuesToDemote(
return TryProcessInstruction(I, *ITE, BitWidth, Ops);
}
- case Instruction::Call: {
- auto *IC = dyn_cast<IntrinsicInst>(I);
- if (!IC)
- break;
- Intrinsic::ID ID = getVectorIntrinsicIDForCall(IC, TLI);
- if (ID != Intrinsic::abs && ID != Intrinsic::smin &&
- ID != Intrinsic::smax && ID != Intrinsic::umin && ID != Intrinsic::umax)
- break;
- SmallVector<Value *> Operands(1, I->getOperand(0));
- End = 1;
- if (ID != Intrinsic::abs) {
- Operands.push_back(I->getOperand(1));
- End = 2;
- }
- InstructionCost BestCost =
- std::numeric_limits<InstructionCost::CostType>::max();
- unsigned BestBitWidth = BitWidth;
- unsigned VF = ITE->Scalars.size();
- // Choose the best bitwidth based on cost estimations.
- auto Checker = [&](unsigned BitWidth, unsigned) {
- unsigned MinBW = PowerOf2Ceil(BitWidth);
- SmallVector<Type *> ArgTys = buildIntrinsicArgTypes(IC, ID, VF, MinBW);
- auto VecCallCosts = getVectorCallCosts(
- IC,
- FixedVectorType::get(IntegerType::get(IC->getContext(), MinBW), VF),
- TTI, TLI, ArgTys);
- InstructionCost Cost = std::min(VecCallCosts.first, VecCallCosts.second);
- if (Cost < BestCost) {
- BestCost = Cost;
- BestBitWidth = BitWidth;
- }
- return false;
- };
- [[maybe_unused]] bool NeedToExit;
- (void)AttemptCheckBitwidth(Checker, NeedToExit);
- BitWidth = BestBitWidth;
- return TryProcessInstruction(I, *ITE, BitWidth, Operands);
- }
-
// Otherwise, conservatively give up.
default:
break;