diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SROA.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/InstructionNamer.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 61 |
4 files changed, 71 insertions, 19 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 8c8fc69..6b67b48 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -544,8 +544,18 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, Value *NewSel = Builder.CreateSelect(SI.getCondition(), Swapped ? C : OOp, Swapped ? OOp : C, "", &SI); - if (isa<FPMathOperator>(&SI)) - cast<Instruction>(NewSel)->setFastMathFlags(FMF); + if (isa<FPMathOperator>(&SI)) { + FastMathFlags NewSelFMF = FMF; + // We cannot propagate ninf from the original select, because OOp may be + // inf and the flag only guarantees that FalseVal (op OOp) is never + // infinity. + // Examples: -inf + +inf = NaN, -inf - -inf = NaN, 0 * inf = NaN + // Specifically, if the original select has both ninf and nnan, we can + // safely propagate the flag. + NewSelFMF.setNoInfs(TVI->hasNoInfs() || + (NewSelFMF.noInfs() && NewSelFMF.noNaNs())); + cast<Instruction>(NewSel)->setFastMathFlags(NewSelFMF); + } NewSel->takeName(TVI); BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel); diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 45d3d49..b9d332b 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2961,6 +2961,7 @@ public: isa<FixedVectorType>(NewAI.getAllocatedType()) ? cast<FixedVectorType>(NewAI.getAllocatedType())->getElementType() : Type::getInt8Ty(NewAI.getContext()); + unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy); // Helper to check if a type is // 1. A fixed vector type @@ -2991,10 +2992,17 @@ public: // Do not handle the case if // 1. The store does not meet the conditions in the helper function // 2. The store is volatile + // 3. The total store size is not a multiple of the allocated element + // type size if (!IsTypeValidForTreeStructuredMerge( SI->getValueOperand()->getType()) || SI->isVolatile()) return std::nullopt; + auto *VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType()); + unsigned NumElts = VecTy->getNumElements(); + unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType()); + if (NumElts * EltSize % AllocatedEltTySize != 0) + return std::nullopt; StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(), SI->getValueOperand()); } else { diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp index 3ae570c..4f1ff7b 100644 --- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -20,9 +20,8 @@ using namespace llvm; -namespace { -void nameInstructions(Function &F) { - for (auto &Arg : F.args()) { +static void nameInstructions(Function &F) { + for (Argument &Arg : F.args()) { if (!Arg.hasName()) Arg.setName("arg"); } @@ -38,8 +37,6 @@ void nameInstructions(Function &F) { } } -} // namespace - PreservedAnalyses InstructionNamerPass::run(Function &F, FunctionAnalysisManager &FAM) { nameInstructions(F); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cfa8d27..2388375 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2245,6 +2245,26 @@ public: Align Alignment, const int64_t Diff, Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const; + /// Return true if an array of scalar loads can be replaced with a strided + /// load (with run-time stride). + /// \param PointerOps list of pointer arguments of loads. + /// \param ScalarTy type of loads. + /// \param CommonAlignment common alignement of loads as computed by + /// `computeCommonAlignment<LoadInst>`. + /// \param SortedIndicies is a list of indicies computed by this function such + /// that the sequence `PointerOps[SortedIndices[0]], + /// PointerOps[SortedIndicies[1]], ..., PointerOps[SortedIndices[n]]` is + /// ordered by the coefficient of the stride. For example, if PointerOps is + /// `%base + %stride, %base, %base + 2 * stride` the `SortedIndices` will be + /// `[1, 0, 2]`. We follow the convention that if `SortedIndices` has to be + /// `0, 1, 2, 3, ...` we return empty vector for `SortedIndicies`. + /// \param SPtrInfo If the function return `true`, it also sets all the fields + /// of `SPtrInfo` necessary to generate the strided load later. + bool analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, Type *ScalarTy, + Align CommonAlignment, + SmallVectorImpl<unsigned> &SortedIndices, + StridedPtrInfo &SPtrInfo) const; + /// Checks if the given array of loads can be represented as a vectorized, /// scatter or just simple gather. /// \param VL list of loads. @@ -6875,6 +6895,24 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, return false; } +bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, + Type *ScalarTy, Align CommonAlignment, + SmallVectorImpl<unsigned> &SortedIndices, + StridedPtrInfo &SPtrInfo) const { + const unsigned Sz = PointerOps.size(); + FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, Sz); + if (Sz <= MinProfitableStridedLoads || !TTI->isTypeLegal(StridedLoadTy) || + !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment)) + return false; + if (const SCEV *Stride = + calculateRtStride(PointerOps, ScalarTy, *DL, *SE, SortedIndices)) { + SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size()); + SPtrInfo.StrideSCEV = Stride; + return true; + } + return false; +} + BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order, SmallVectorImpl<Value *> &PointerOps, StridedPtrInfo &SPtrInfo, @@ -6915,15 +6953,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( auto *VecTy = getWidenedType(ScalarTy, Sz); Align CommonAlignment = computeCommonAlignment<LoadInst>(VL); if (!IsSorted) { - if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) { - if (const SCEV *Stride = - calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order); - Stride && TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) { - SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size()); - SPtrInfo.StrideSCEV = Stride; - return LoadsState::StridedVectorize; - } - } + if (analyzeRtStrideCandidate(PointerOps, ScalarTy, CommonAlignment, Order, + SPtrInfo)) + return LoadsState::StridedVectorize; if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) || TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment)) @@ -10632,7 +10664,9 @@ class InstructionsCompatibilityAnalysis { void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) { BasicBlock *Parent = nullptr; // Checks if the instruction has supported opcode. - auto IsSupportedInstruction = [&](Instruction *I) { + auto IsSupportedInstruction = [&](Instruction *I, bool AnyUndef) { + if (AnyUndef && (I->isIntDivRem() || I->isFPDivRem() || isa<CallInst>(I))) + return false; return I && isSupportedOpcode(I->getOpcode()) && (!doesNotNeedToBeScheduled(I) || !R.isVectorized(I)); }; @@ -10640,10 +10674,13 @@ class InstructionsCompatibilityAnalysis { // will be unable to schedule anyway. SmallDenseSet<Value *, 8> Operands; SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates; + bool AnyUndef = false; for (Value *V : VL) { auto *I = dyn_cast<Instruction>(V); - if (!I) + if (!I) { + AnyUndef |= isa<UndefValue>(V); continue; + } if (!DT.isReachableFromEntry(I->getParent())) continue; if (Candidates.empty()) { @@ -10678,7 +10715,7 @@ class InstructionsCompatibilityAnalysis { if (P.second.size() < BestOpcodeNum) continue; for (Instruction *I : P.second) { - if (IsSupportedInstruction(I) && !Operands.contains(I)) { + if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) { MainOp = I; BestOpcodeNum = P.second.size(); break; |