diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/ConstraintElimination.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/InjectTLIMappings.cpp | 46 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 35 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h | 11 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 29 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 |
7 files changed, 100 insertions, 56 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 529f730..89a1ad2 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -2953,6 +2953,9 @@ void coro::salvageDebugInfo( std::optional<BasicBlock::iterator> InsertPt; if (auto *I = dyn_cast<Instruction>(Storage)) { InsertPt = I->getInsertionPointAfterDef(); + // Update DILocation only in O0 since it is easy to get out of sync in + // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for + // an example. if (!OptimizeFrame && I->getDebugLoc()) DVI.setDebugLoc(I->getDebugLoc()); } else if (isa<Argument>(Storage)) @@ -2988,9 +2991,14 @@ void coro::salvageDebugInfo( // dbg.declare does. if (DPV.getType() == DPValue::LocationType::Declare) { std::optional<BasicBlock::iterator> InsertPt; - if (auto *I = dyn_cast<Instruction>(Storage)) + if (auto *I = dyn_cast<Instruction>(Storage)) { InsertPt = I->getInsertionPointAfterDef(); - else if (isa<Argument>(Storage)) + // Update DILocation only in O0 since it is easy to get out of sync in + // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for + // an example. + if (!OptimizeFrame && I->getDebugLoc()) + DPV.setDebugLoc(I->getDebugLoc()); + } else if (isa<Argument>(Storage)) InsertPt = F->getEntryBlock().begin(); if (InsertPt) { DPV.removeFromParent(); diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 49ac1e9..cc93c86 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -366,6 +366,13 @@ struct Decomposition { append_range(Vars, Other.Vars); } + void sub(const Decomposition &Other) { + Decomposition Tmp = Other; + Tmp.mul(-1); + add(Tmp.Offset); + append_range(Vars, Tmp.Vars); + } + void mul(int64_t Factor) { Offset = multiplyWithOverflow(Offset, Factor); for (auto &Var : Vars) @@ -569,10 +576,12 @@ static Decomposition decompose(Value *V, return Result; } - if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI)) - return {-1 * CI->getSExtValue(), {{1, Op0}}}; - if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) - return {0, {{1, Op0}, {-1, Op1}}}; + if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) { + auto ResA = decompose(Op0, Preconditions, IsSigned, DL); + auto ResB = decompose(Op1, Preconditions, IsSigned, DL); + ResA.sub(ResB); + return ResA; + } return {V, IsKnownNonNegative}; } @@ -635,7 +644,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, // First try to look up \p V in Value2Index and NewVariables. Otherwise add a // new entry to NewVariables. - DenseMap<Value *, unsigned> NewIndexMap; + SmallDenseMap<Value *, unsigned> NewIndexMap; auto GetOrAddIndex = [&Value2Index, &NewVariables, &NewIndexMap](Value *V) -> unsigned { auto V2I = Value2Index.find(V); @@ -659,7 +668,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, IsSigned, IsEq, IsNe); // Collect variables that are known to be positive in all uses in the // constraint. - DenseMap<Value *, bool> KnownNonNegativeVariables; + SmallDenseMap<Value *, bool> KnownNonNegativeVariables; auto &R = Res.Coefficients; for (const auto &KV : VariablesA) { R[GetOrAddIndex(KV.Variable)] += KV.Coefficient; diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp index 0990c75..ea31356 100644 --- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp +++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -33,37 +33,37 @@ STATISTIC(NumVFDeclAdded, STATISTIC(NumCompUsedAdded, "Number of `@llvm.compiler.used` operands that have been added."); -/// A helper function that adds the vector function declaration that -/// vectorizes the CallInst CI with a vectorization factor of VF -/// lanes. The TLI assumes that all parameters and the return type of -/// CI (other than void) need to be widened to a VectorType of VF -/// lanes. +/// A helper function that adds the vector variant declaration for vectorizing +/// the CallInst \p CI with a vectorization factor of \p VF lanes. For each +/// mapping, TLI provides a VABI prefix, which contains all information required +/// to create vector function declaration. static void addVariantDeclaration(CallInst &CI, const ElementCount &VF, - bool Predicate, const StringRef VFName) { + const VecDesc *VD) { Module *M = CI.getModule(); + FunctionType *ScalarFTy = CI.getFunctionType(); - // Add function declaration. - Type *RetTy = ToVectorTy(CI.getType(), VF); - SmallVector<Type *, 4> Tys; - for (Value *ArgOperand : CI.args()) - Tys.push_back(ToVectorTy(ArgOperand->getType(), VF)); - assert(!CI.getFunctionType()->isVarArg() && - "VarArg functions are not supported."); - if (Predicate) - Tys.push_back(ToVectorTy(Type::getInt1Ty(RetTy->getContext()), VF)); - FunctionType *FTy = FunctionType::get(RetTy, Tys, /*isVarArg=*/false); - Function *VectorF = - Function::Create(FTy, Function::ExternalLinkage, VFName, M); - VectorF->copyAttributesFrom(CI.getCalledFunction()); + assert(!ScalarFTy->isVarArg() && "VarArg functions are not supported."); + + const std::optional<VFInfo> Info = VFABI::tryDemangleForVFABI( + VD->getVectorFunctionABIVariantString(), ScalarFTy); + + assert(Info && "Failed to demangle vector variant"); + assert(Info->Shape.VF == VF && "Mangled name does not match VF"); + + const StringRef VFName = VD->getVectorFnName(); + FunctionType *VectorFTy = VFABI::createFunctionType(*Info, ScalarFTy); + Function *VecFunc = + Function::Create(VectorFTy, Function::ExternalLinkage, VFName, M); + VecFunc->copyAttributesFrom(CI.getCalledFunction()); ++NumVFDeclAdded; LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added to the module: `" << VFName - << "` of type " << *(VectorF->getType()) << "\n"); + << "` of type " << *VectorFTy << "\n"); // Make function declaration (without a body) "sticky" in the IR by // listing it in the @llvm.compiler.used intrinsic. - assert(!VectorF->size() && "VFABI attribute requires `@llvm.compiler.used` " + assert(!VecFunc->size() && "VFABI attribute requires `@llvm.compiler.used` " "only on declarations."); - appendToCompilerUsed(*M, {VectorF}); + appendToCompilerUsed(*M, {VecFunc}); LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << VFName << "` to `@llvm.compiler.used`.\n"); ++NumCompUsedAdded; @@ -100,7 +100,7 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { } Function *VariantF = M->getFunction(VD->getVectorFnName()); if (!VariantF) - addVariantDeclaration(CI, VF, Predicate, VD->getVectorFnName()); + addVariantDeclaration(CI, VF, VD); } }; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 55e3756..61d891d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5414,11 +5414,13 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { } static void createUnreachableSwitchDefault(SwitchInst *Switch, - DomTreeUpdater *DTU) { + DomTreeUpdater *DTU, + bool RemoveOrigDefaultBlock = true) { LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); auto *BB = Switch->getParent(); auto *OrigDefaultBlock = Switch->getDefaultDest(); - OrigDefaultBlock->removePredecessor(BB); + if (RemoveOrigDefaultBlock) + OrigDefaultBlock->removePredecessor(BB); BasicBlock *NewDefaultBlock = BasicBlock::Create( BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), OrigDefaultBlock); @@ -5427,7 +5429,8 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch, if (DTU) { SmallVector<DominatorTree::UpdateType, 2> Updates; Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); - if (!is_contained(successors(BB), OrigDefaultBlock)) + if (RemoveOrigDefaultBlock && + !is_contained(successors(BB), OrigDefaultBlock)) Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); DTU->applyUpdates(Updates); } @@ -5609,10 +5612,28 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, Known.getBitWidth() - (Known.Zero | Known.One).popcount(); assert(NumUnknownBits <= Known.getBitWidth()); if (HasDefault && DeadCases.empty() && - NumUnknownBits < 64 /* avoid overflow */ && - SI->getNumCases() == (1ULL << NumUnknownBits)) { - createUnreachableSwitchDefault(SI, DTU); - return true; + NumUnknownBits < 64 /* avoid overflow */) { + uint64_t AllNumCases = 1ULL << NumUnknownBits; + if (SI->getNumCases() == AllNumCases) { + createUnreachableSwitchDefault(SI, DTU); + return true; + } + // When only one case value is missing, replace default with that case. + // Eliminating the default branch will provide more opportunities for + // optimization, such as lookup tables. + if (SI->getNumCases() == AllNumCases - 1) { + assert(NumUnknownBits > 1 && "Should be canonicalized to a branch"); + uint64_t MissingCaseVal = 0; + for (const auto &Case : SI->cases()) + MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue(); + auto *MissingCase = + cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal)); + SwitchInstProfUpdateWrapper SIW(*SI); + SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0)); + createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false); + SIW.setSuccessorWeight(0, 0); + return true; + } } if (DeadCases.empty()) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 577ce80..150ab30 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -167,9 +167,14 @@ public: } VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, - DebugLoc DL, const Twine &Name = "") { - return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL, - Name); + DebugLoc DL, const Twine &Name = "", + std::optional<FastMathFlags> FMFs = std::nullopt) { + auto *Select = + FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, + *FMFs, DL, Name) + : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, + DL, Name); + return tryInsertInstruction(Select); } /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8e135d8..f5f0461 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9141,7 +9141,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( continue; const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - auto *Result = PhiR->getBackedgeValue()->getDefiningRecipe(); + auto *NewExitingVPV = PhiR->getBackedgeValue(); // If tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the beginning of the // dedicated latch block. @@ -9151,21 +9151,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( VPValue *Red = PhiR->getBackedgeValue(); assert(Red->getDefiningRecipe()->getParent() != LatchVPBB && "reduction recipe must be defined before latch"); - FastMathFlags FMFs = RdxDesc.getFastMathFlags(); Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType(); - Result = + std::optional<FastMathFlags> FMFs = PhiTy->isFloatingPointTy() - ? new VPInstruction(Instruction::Select, {Cond, Red, PhiR}, FMFs) - : new VPInstruction(Instruction::Select, {Cond, Red, PhiR}); - Result->insertBefore(&*Builder.getInsertPoint()); - Red->replaceUsesWithIf( - Result->getVPSingleValue(), - [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); }); + ? std::make_optional(RdxDesc.getFastMathFlags()) + : std::nullopt; + NewExitingVPV = Builder.createSelect(Cond, Red, PhiR, {}, "", FMFs); + Red->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) { + return isa<VPLiveOut>(&U); + }); if (PreferPredicatedReductionSelect || TTI.preferPredicatedReductionSelect( PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy, TargetTransformInfo::ReductionFlags())) - PhiR->setOperand(1, Result->getVPSingleValue()); + PhiR->setOperand(1, NewExitingVPV); } // If the vector reduction can be performed in a smaller type, we truncate // then extend the loop exit value to enable InstCombine to evaluate the @@ -9174,17 +9173,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); Type *RdxTy = RdxDesc.getRecurrenceType(); - auto *Trunc = new VPWidenCastRecipe(Instruction::Trunc, - Result->getVPSingleValue(), RdxTy); + auto *Trunc = + new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy); auto *Extnd = RdxDesc.isSigned() ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy) : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy); - Trunc->insertAfter(Result); + Trunc->insertAfter(NewExitingVPV->getDefiningRecipe()); Extnd->insertAfter(Trunc); - Result->getVPSingleValue()->replaceAllUsesWith(Extnd); - Trunc->setOperand(0, Result->getVPSingleValue()); + NewExitingVPV->replaceAllUsesWith(Extnd); + Trunc->setOperand(0, NewExitingVPV); } } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 30499152..bd89ec0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11139,6 +11139,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { case Instruction::ExtractElement: { Value *V = E->getSingleOperand(0); + if (const TreeEntry *TE = getTreeEntry(V)) + V = TE->VectorizedValue; setInsertPointAfterBundle(E); V = FinalShuffle(V, E, VecTy, IsSigned); E->VectorizedValue = V; |