diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineInternal.h | 21 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp | 62 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/GVN.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SROA.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/InstructionNamer.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 61 |
11 files changed, 136 insertions, 57 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 59e103cd..73ec451 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -880,11 +880,13 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { // zext(bool) + C -> bool ? C + 1 : C if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) - return createSelectInst(X, InstCombiner::AddOne(Op1C), Op1); + return createSelectInstWithUnknownProfile(X, InstCombiner::AddOne(Op1C), + Op1); // sext(bool) + C -> bool ? C - 1 : C if (match(Op0, m_SExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) - return createSelectInst(X, InstCombiner::SubOne(Op1C), Op1); + return createSelectInstWithUnknownProfile(X, InstCombiner::SubOne(Op1C), + Op1); // ~X + C --> (C-1) - X if (match(Op0, m_Not(m_Value(X)))) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 218aaf9..7071876 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -471,18 +471,19 @@ private: Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable, unsigned Depth = 0); - SelectInst *createSelectInst(Value *C, Value *S1, Value *S2, - const Twine &NameStr = "", - InsertPosition InsertBefore = nullptr, - Instruction *MDFrom = nullptr) { - SelectInst *SI = - SelectInst::Create(C, S1, S2, NameStr, InsertBefore, MDFrom); - if (!MDFrom) - setExplicitlyUnknownBranchWeightsIfProfiled(*SI, F, DEBUG_TYPE); - return SI; +public: + /// Create `select C, S1, S2`. Use only when the profile cannot be calculated + /// from existing profile metadata: if the Function has profiles, this will + /// set the profile of this select to "unknown". + SelectInst * + createSelectInstWithUnknownProfile(Value *C, Value *S1, Value *S2, + const Twine &NameStr = "", + InsertPosition InsertBefore = nullptr) { + auto *Sel = SelectInst::Create(C, S1, S2, NameStr, InsertBefore, nullptr); + setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, F, DEBUG_TYPE); + return Sel; } -public: /// Create and insert the idiom we use to indicate a block is unreachable /// without having to rewrite the CFG from within InstCombine. void CreateNonTerminatorUnreachable(Instruction *InsertAt) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 8c8fc69..6b67b48 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -544,8 +544,18 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, Value *NewSel = Builder.CreateSelect(SI.getCondition(), Swapped ? C : OOp, Swapped ? OOp : C, "", &SI); - if (isa<FPMathOperator>(&SI)) - cast<Instruction>(NewSel)->setFastMathFlags(FMF); + if (isa<FPMathOperator>(&SI)) { + FastMathFlags NewSelFMF = FMF; + // We cannot propagate ninf from the original select, because OOp may be + // inf and the flag only guarantees that FalseVal (op OOp) is never + // infinity. + // Examples: -inf + +inf = NaN, -inf - -inf = NaN, 0 * inf = NaN + // Specifically, if the original select has both ninf and nnan, we can + // safely propagate the flag. + NewSelFMF.setNoInfs(TVI->hasNoInfs() || + (NewSelFMF.noInfs() && NewSelFMF.noNaNs())); + cast<Instruction>(NewSel)->setFastMathFlags(NewSelFMF); + } NewSel->takeName(TVI); BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index d457e0c..899a3c1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1253,7 +1253,8 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { // shl (zext i1 X), C1 --> select (X, 1 << C1, 0) if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { auto *NewC = Builder.CreateShl(ConstantInt::get(Ty, 1), C1); - return createSelectInst(X, NewC, ConstantInt::getNullValue(Ty)); + return createSelectInstWithUnknownProfile(X, NewC, + ConstantInt::getNullValue(Ty)); } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 127a506..63e24a0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" @@ -107,7 +108,10 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I, Value *ShrAmtZ = IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(ShrAmt->getType()), ShrAmt->getName() + ".z"); - Value *Select = IC.Builder.CreateSelect(ShrAmtZ, Lower, Upper); + // There is no existing !prof metadata we can derive the !prof metadata for + // this select. + Value *Select = IC.createSelectInstWithUnknownProfile(ShrAmtZ, Lower, Upper); + IC.Builder.Insert(Select); Select->takeName(I); return Select; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index d56a1af..82ac903 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1737,7 +1737,7 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { Constant *Zero = ConstantInt::getNullValue(BO.getType()); Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C); Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C); - return createSelectInst(X, TVal, FVal); + return createSelectInstWithUnknownProfile(X, TVal, FVal); } static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI, diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index e5935f4..ff5f390 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -386,22 +386,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { return OS; } -/// Helper to get the successor corresponding to a particular case value for -/// a switch statement. -static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, - const APInt &NextState) { - BasicBlock *NextCase = nullptr; - for (auto Case : Switch->cases()) { - if (Case.getCaseValue()->getValue() == NextState) { - NextCase = Case.getCaseSuccessor(); - break; - } - } - if (!NextCase) - NextCase = Switch->getDefaultDest(); - return NextCase; -} - namespace { /// ThreadingPath is a path in the control flow of a loop that can be threaded /// by cloning necessary basic blocks and replacing conditional branches with @@ -835,19 +819,32 @@ private: TPaths = std::move(TempList); } + /// Fast helper to get the successor corresponding to a particular case value + /// for a switch statement. + BasicBlock *getNextCaseSuccessor(const APInt &NextState) { + // Precompute the value => successor mapping + if (CaseValToDest.empty()) { + for (auto Case : Switch->cases()) { + APInt CaseVal = Case.getCaseValue()->getValue(); + CaseValToDest[CaseVal] = Case.getCaseSuccessor(); + } + } + + auto SuccIt = CaseValToDest.find(NextState); + return SuccIt == CaseValToDest.end() ? Switch->getDefaultDest() + : SuccIt->second; + } + // Two states are equivalent if they have the same switch destination. // Unify the states in different threading path if the states are equivalent. void unifyTPaths() { - llvm::SmallDenseMap<BasicBlock *, APInt> DestToState; + SmallDenseMap<BasicBlock *, APInt> DestToState; for (ThreadingPath &Path : TPaths) { APInt NextState = Path.getExitValue(); - BasicBlock *Dest = getNextCaseSuccessor(Switch, NextState); - auto StateIt = DestToState.find(Dest); - if (StateIt == DestToState.end()) { - DestToState.insert({Dest, NextState}); + BasicBlock *Dest = getNextCaseSuccessor(NextState); + auto [StateIt, Inserted] = DestToState.try_emplace(Dest, NextState); + if (Inserted) continue; - } - if (NextState != StateIt->second) { LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to " << StateIt->second << "\n"); @@ -861,6 +858,7 @@ private: BasicBlock *SwitchBlock; OptimizationRemarkEmitter *ORE; std::vector<ThreadingPath> TPaths; + DenseMap<APInt, BasicBlock *> CaseValToDest; LoopInfo *LI; Loop *SwitchOuterLoop; }; @@ -1162,6 +1160,24 @@ private: SSAUpdate.RewriteAllUses(&DTU->getDomTree()); } + /// Helper to get the successor corresponding to a particular case value for + /// a switch statement. + /// TODO: Unify it with SwitchPaths->getNextCaseSuccessor(SwitchInst *Switch) + /// by updating cached value => successor mapping during threading. + static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, + const APInt &NextState) { + BasicBlock *NextCase = nullptr; + for (auto Case : Switch->cases()) { + if (Case.getCaseValue()->getValue() == NextState) { + NextCase = Case.getCaseSuccessor(); + break; + } + } + if (!NextCase) + NextCase = Switch->getDefaultDest(); + return NextCase; + } + /// Clones a basic block, and adds it to the CFG. /// /// This function also includes updating phi nodes in the successors of the diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 3a8ade8..42db424 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2156,6 +2156,9 @@ bool GVNPass::processLoad(LoadInst *L) { if (!L->isUnordered()) return false; + if (L->getType()->isTokenLikeTy()) + return false; + if (L->use_empty()) { salvageAndRemoveInstruction(L); return true; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 45d3d49..b9d332b 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2961,6 +2961,7 @@ public: isa<FixedVectorType>(NewAI.getAllocatedType()) ? cast<FixedVectorType>(NewAI.getAllocatedType())->getElementType() : Type::getInt8Ty(NewAI.getContext()); + unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy); // Helper to check if a type is // 1. A fixed vector type @@ -2991,10 +2992,17 @@ public: // Do not handle the case if // 1. The store does not meet the conditions in the helper function // 2. The store is volatile + // 3. The total store size is not a multiple of the allocated element + // type size if (!IsTypeValidForTreeStructuredMerge( SI->getValueOperand()->getType()) || SI->isVolatile()) return std::nullopt; + auto *VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType()); + unsigned NumElts = VecTy->getNumElements(); + unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType()); + if (NumElts * EltSize % AllocatedEltTySize != 0) + return std::nullopt; StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(), SI->getValueOperand()); } else { diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp index 3ae570c..4f1ff7b 100644 --- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -20,9 +20,8 @@ using namespace llvm; -namespace { -void nameInstructions(Function &F) { - for (auto &Arg : F.args()) { +static void nameInstructions(Function &F) { + for (Argument &Arg : F.args()) { if (!Arg.hasName()) Arg.setName("arg"); } @@ -38,8 +37,6 @@ void nameInstructions(Function &F) { } } -} // namespace - PreservedAnalyses InstructionNamerPass::run(Function &F, FunctionAnalysisManager &FAM) { nameInstructions(F); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cfa8d27..2388375 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2245,6 +2245,26 @@ public: Align Alignment, const int64_t Diff, Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const; + /// Return true if an array of scalar loads can be replaced with a strided + /// load (with run-time stride). + /// \param PointerOps list of pointer arguments of loads. + /// \param ScalarTy type of loads. + /// \param CommonAlignment common alignement of loads as computed by + /// `computeCommonAlignment<LoadInst>`. + /// \param SortedIndicies is a list of indicies computed by this function such + /// that the sequence `PointerOps[SortedIndices[0]], + /// PointerOps[SortedIndicies[1]], ..., PointerOps[SortedIndices[n]]` is + /// ordered by the coefficient of the stride. For example, if PointerOps is + /// `%base + %stride, %base, %base + 2 * stride` the `SortedIndices` will be + /// `[1, 0, 2]`. We follow the convention that if `SortedIndices` has to be + /// `0, 1, 2, 3, ...` we return empty vector for `SortedIndicies`. + /// \param SPtrInfo If the function return `true`, it also sets all the fields + /// of `SPtrInfo` necessary to generate the strided load later. + bool analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, Type *ScalarTy, + Align CommonAlignment, + SmallVectorImpl<unsigned> &SortedIndices, + StridedPtrInfo &SPtrInfo) const; + /// Checks if the given array of loads can be represented as a vectorized, /// scatter or just simple gather. /// \param VL list of loads. @@ -6875,6 +6895,24 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, return false; } +bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, + Type *ScalarTy, Align CommonAlignment, + SmallVectorImpl<unsigned> &SortedIndices, + StridedPtrInfo &SPtrInfo) const { + const unsigned Sz = PointerOps.size(); + FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, Sz); + if (Sz <= MinProfitableStridedLoads || !TTI->isTypeLegal(StridedLoadTy) || + !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment)) + return false; + if (const SCEV *Stride = + calculateRtStride(PointerOps, ScalarTy, *DL, *SE, SortedIndices)) { + SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size()); + SPtrInfo.StrideSCEV = Stride; + return true; + } + return false; +} + BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order, SmallVectorImpl<Value *> &PointerOps, StridedPtrInfo &SPtrInfo, @@ -6915,15 +6953,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( auto *VecTy = getWidenedType(ScalarTy, Sz); Align CommonAlignment = computeCommonAlignment<LoadInst>(VL); if (!IsSorted) { - if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) { - if (const SCEV *Stride = - calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order); - Stride && TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) { - SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size()); - SPtrInfo.StrideSCEV = Stride; - return LoadsState::StridedVectorize; - } - } + if (analyzeRtStrideCandidate(PointerOps, ScalarTy, CommonAlignment, Order, + SPtrInfo)) + return LoadsState::StridedVectorize; if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) || TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment)) @@ -10632,7 +10664,9 @@ class InstructionsCompatibilityAnalysis { void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) { BasicBlock *Parent = nullptr; // Checks if the instruction has supported opcode. - auto IsSupportedInstruction = [&](Instruction *I) { + auto IsSupportedInstruction = [&](Instruction *I, bool AnyUndef) { + if (AnyUndef && (I->isIntDivRem() || I->isFPDivRem() || isa<CallInst>(I))) + return false; return I && isSupportedOpcode(I->getOpcode()) && (!doesNotNeedToBeScheduled(I) || !R.isVectorized(I)); }; @@ -10640,10 +10674,13 @@ class InstructionsCompatibilityAnalysis { // will be unable to schedule anyway. SmallDenseSet<Value *, 8> Operands; SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates; + bool AnyUndef = false; for (Value *V : VL) { auto *I = dyn_cast<Instruction>(V); - if (!I) + if (!I) { + AnyUndef |= isa<UndefValue>(V); continue; + } if (!DT.isReachableFromEntry(I->getParent())) continue; if (Candidates.empty()) { @@ -10678,7 +10715,7 @@ class InstructionsCompatibilityAnalysis { if (P.second.size() < BestOpcodeNum) continue; for (Instruction *I : P.second) { - if (IsSupportedInstruction(I) && !Operands.contains(I)) { + if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) { MainOp = I; BestOpcodeNum = P.second.size(); break; |