aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/InstructionNamer.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp61
4 files changed, 71 insertions, 19 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8c8fc69..6b67b48 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -544,8 +544,18 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Value *NewSel = Builder.CreateSelect(SI.getCondition(), Swapped ? C : OOp,
Swapped ? OOp : C, "", &SI);
- if (isa<FPMathOperator>(&SI))
- cast<Instruction>(NewSel)->setFastMathFlags(FMF);
+ if (isa<FPMathOperator>(&SI)) {
+ FastMathFlags NewSelFMF = FMF;
+ // We cannot propagate ninf from the original select, because OOp may be
+ // inf and the flag only guarantees that FalseVal (op OOp) is never
+ // infinity.
+ // Examples: -inf + +inf = NaN, -inf - -inf = NaN, 0 * inf = NaN
+ // Specifically, if the original select has both ninf and nnan, we can
+ // safely propagate the flag.
+ NewSelFMF.setNoInfs(TVI->hasNoInfs() ||
+ (NewSelFMF.noInfs() && NewSelFMF.noNaNs()));
+ cast<Instruction>(NewSel)->setFastMathFlags(NewSelFMF);
+ }
NewSel->takeName(TVI);
BinaryOperator *BO =
BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel);
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 45d3d49..b9d332b 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2961,6 +2961,7 @@ public:
isa<FixedVectorType>(NewAI.getAllocatedType())
? cast<FixedVectorType>(NewAI.getAllocatedType())->getElementType()
: Type::getInt8Ty(NewAI.getContext());
+ unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy);
// Helper to check if a type is
// 1. A fixed vector type
@@ -2991,10 +2992,17 @@ public:
// Do not handle the case if
// 1. The store does not meet the conditions in the helper function
// 2. The store is volatile
+ // 3. The total store size is not a multiple of the allocated element
+ // type size
if (!IsTypeValidForTreeStructuredMerge(
SI->getValueOperand()->getType()) ||
SI->isVolatile())
return std::nullopt;
+ auto *VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
+ unsigned NumElts = VecTy->getNumElements();
+ unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType());
+ if (NumElts * EltSize % AllocatedEltTySize != 0)
+ return std::nullopt;
StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(),
SI->getValueOperand());
} else {
diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index 3ae570c..4f1ff7b 100644
--- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -20,9 +20,8 @@
using namespace llvm;
-namespace {
-void nameInstructions(Function &F) {
- for (auto &Arg : F.args()) {
+static void nameInstructions(Function &F) {
+ for (Argument &Arg : F.args()) {
if (!Arg.hasName())
Arg.setName("arg");
}
@@ -38,8 +37,6 @@ void nameInstructions(Function &F) {
}
}
-} // namespace
-
PreservedAnalyses InstructionNamerPass::run(Function &F,
FunctionAnalysisManager &FAM) {
nameInstructions(F);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cfa8d27..2388375 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2245,6 +2245,26 @@ public:
Align Alignment, const int64_t Diff, Value *Ptr0,
Value *PtrN, StridedPtrInfo &SPtrInfo) const;
+ /// Return true if an array of scalar loads can be replaced with a strided
+ /// load (with run-time stride).
+ /// \param PointerOps list of pointer arguments of loads.
+ /// \param ScalarTy type of loads.
+ /// \param CommonAlignment common alignement of loads as computed by
+ /// `computeCommonAlignment<LoadInst>`.
+ /// \param SortedIndicies is a list of indicies computed by this function such
+ /// that the sequence `PointerOps[SortedIndices[0]],
+ /// PointerOps[SortedIndicies[1]], ..., PointerOps[SortedIndices[n]]` is
+ /// ordered by the coefficient of the stride. For example, if PointerOps is
+ /// `%base + %stride, %base, %base + 2 * stride` the `SortedIndices` will be
+ /// `[1, 0, 2]`. We follow the convention that if `SortedIndices` has to be
+ /// `0, 1, 2, 3, ...` we return empty vector for `SortedIndicies`.
+ /// \param SPtrInfo If the function return `true`, it also sets all the fields
+ /// of `SPtrInfo` necessary to generate the strided load later.
+ bool analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+ Align CommonAlignment,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ StridedPtrInfo &SPtrInfo) const;
+
/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
/// \param VL list of loads.
@@ -6875,6 +6895,24 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
return false;
}
+bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps,
+ Type *ScalarTy, Align CommonAlignment,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ StridedPtrInfo &SPtrInfo) const {
+ const unsigned Sz = PointerOps.size();
+ FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, Sz);
+ if (Sz <= MinProfitableStridedLoads || !TTI->isTypeLegal(StridedLoadTy) ||
+ !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment))
+ return false;
+ if (const SCEV *Stride =
+ calculateRtStride(PointerOps, ScalarTy, *DL, *SE, SortedIndices)) {
+ SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size());
+ SPtrInfo.StrideSCEV = Stride;
+ return true;
+ }
+ return false;
+}
+
BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps, StridedPtrInfo &SPtrInfo,
@@ -6915,15 +6953,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
auto *VecTy = getWidenedType(ScalarTy, Sz);
Align CommonAlignment = computeCommonAlignment<LoadInst>(VL);
if (!IsSorted) {
- if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) {
- if (const SCEV *Stride =
- calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order);
- Stride && TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) {
- SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size());
- SPtrInfo.StrideSCEV = Stride;
- return LoadsState::StridedVectorize;
- }
- }
+ if (analyzeRtStrideCandidate(PointerOps, ScalarTy, CommonAlignment, Order,
+ SPtrInfo))
+ return LoadsState::StridedVectorize;
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment))
@@ -10632,7 +10664,9 @@ class InstructionsCompatibilityAnalysis {
void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) {
BasicBlock *Parent = nullptr;
// Checks if the instruction has supported opcode.
- auto IsSupportedInstruction = [&](Instruction *I) {
+ auto IsSupportedInstruction = [&](Instruction *I, bool AnyUndef) {
+ if (AnyUndef && (I->isIntDivRem() || I->isFPDivRem() || isa<CallInst>(I)))
+ return false;
return I && isSupportedOpcode(I->getOpcode()) &&
(!doesNotNeedToBeScheduled(I) || !R.isVectorized(I));
};
@@ -10640,10 +10674,13 @@ class InstructionsCompatibilityAnalysis {
// will be unable to schedule anyway.
SmallDenseSet<Value *, 8> Operands;
SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
+ bool AnyUndef = false;
for (Value *V : VL) {
auto *I = dyn_cast<Instruction>(V);
- if (!I)
+ if (!I) {
+ AnyUndef |= isa<UndefValue>(V);
continue;
+ }
if (!DT.isReachableFromEntry(I->getParent()))
continue;
if (Candidates.empty()) {
@@ -10678,7 +10715,7 @@ class InstructionsCompatibilityAnalysis {
if (P.second.size() < BestOpcodeNum)
continue;
for (Instruction *I : P.second) {
- if (IsSupportedInstruction(I) && !Operands.contains(I)) {
+ if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) {
MainOp = I;
BestOpcodeNum = P.second.size();
break;