aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h21
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp14
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp62
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/InstructionNamer.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp61
11 files changed, 136 insertions, 57 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 59e103cd..73ec451 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -880,11 +880,13 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
// zext(bool) + C -> bool ? C + 1 : C
if (match(Op0, m_ZExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return createSelectInst(X, InstCombiner::AddOne(Op1C), Op1);
+ return createSelectInstWithUnknownProfile(X, InstCombiner::AddOne(Op1C),
+ Op1);
// sext(bool) + C -> bool ? C - 1 : C
if (match(Op0, m_SExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return createSelectInst(X, InstCombiner::SubOne(Op1C), Op1);
+ return createSelectInstWithUnknownProfile(X, InstCombiner::SubOne(Op1C),
+ Op1);
// ~X + C --> (C-1) - X
if (match(Op0, m_Not(m_Value(X)))) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 218aaf9..7071876 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -471,18 +471,19 @@ private:
Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable,
unsigned Depth = 0);
- SelectInst *createSelectInst(Value *C, Value *S1, Value *S2,
- const Twine &NameStr = "",
- InsertPosition InsertBefore = nullptr,
- Instruction *MDFrom = nullptr) {
- SelectInst *SI =
- SelectInst::Create(C, S1, S2, NameStr, InsertBefore, MDFrom);
- if (!MDFrom)
- setExplicitlyUnknownBranchWeightsIfProfiled(*SI, F, DEBUG_TYPE);
- return SI;
+public:
+ /// Create `select C, S1, S2`. Use only when the profile cannot be calculated
+ /// from existing profile metadata: if the Function has profiles, this will
+ /// set the profile of this select to "unknown".
+ SelectInst *
+ createSelectInstWithUnknownProfile(Value *C, Value *S1, Value *S2,
+ const Twine &NameStr = "",
+ InsertPosition InsertBefore = nullptr) {
+ auto *Sel = SelectInst::Create(C, S1, S2, NameStr, InsertBefore, nullptr);
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, F, DEBUG_TYPE);
+ return Sel;
}
-public:
/// Create and insert the idiom we use to indicate a block is unreachable
/// without having to rewrite the CFG from within InstCombine.
void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8c8fc69..6b67b48 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -544,8 +544,18 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Value *NewSel = Builder.CreateSelect(SI.getCondition(), Swapped ? C : OOp,
Swapped ? OOp : C, "", &SI);
- if (isa<FPMathOperator>(&SI))
- cast<Instruction>(NewSel)->setFastMathFlags(FMF);
+ if (isa<FPMathOperator>(&SI)) {
+ FastMathFlags NewSelFMF = FMF;
+ // We cannot propagate ninf from the original select, because OOp may be
+ // inf and the flag only guarantees that FalseVal (op OOp) is never
+ // infinity.
+ // Examples: -inf + +inf = NaN, -inf - -inf = NaN, 0 * inf = NaN
+ // Specifically, if the original select has both ninf and nnan, we can
+ // safely propagate the flag.
+ NewSelFMF.setNoInfs(TVI->hasNoInfs() ||
+ (NewSelFMF.noInfs() && NewSelFMF.noNaNs()));
+ cast<Instruction>(NewSel)->setFastMathFlags(NewSelFMF);
+ }
NewSel->takeName(TVI);
BinaryOperator *BO =
BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index d457e0c..899a3c1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1253,7 +1253,8 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
// shl (zext i1 X), C1 --> select (X, 1 << C1, 0)
if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
auto *NewC = Builder.CreateShl(ConstantInt::get(Ty, 1), C1);
- return createSelectInst(X, NewC, ConstantInt::getNullValue(Ty));
+ return createSelectInstWithUnknownProfile(X, NewC,
+ ConstantInt::getNullValue(Ty));
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 127a506..63e24a0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
@@ -107,7 +108,10 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I,
Value *ShrAmtZ =
IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(ShrAmt->getType()),
ShrAmt->getName() + ".z");
- Value *Select = IC.Builder.CreateSelect(ShrAmtZ, Lower, Upper);
+ // There is no existing !prof metadata we can derive the !prof metadata for
+ // this select.
+ Value *Select = IC.createSelectInstWithUnknownProfile(ShrAmtZ, Lower, Upper);
+ IC.Builder.Insert(Select);
Select->takeName(I);
return Select;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index d56a1af..82ac903 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1737,7 +1737,7 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
Constant *Zero = ConstantInt::getNullValue(BO.getType());
Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
- return createSelectInst(X, TVal, FVal);
+ return createSelectInstWithUnknownProfile(X, TVal, FVal);
}
static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index e5935f4..ff5f390 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -386,22 +386,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
return OS;
}
-/// Helper to get the successor corresponding to a particular case value for
-/// a switch statement.
-static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch,
- const APInt &NextState) {
- BasicBlock *NextCase = nullptr;
- for (auto Case : Switch->cases()) {
- if (Case.getCaseValue()->getValue() == NextState) {
- NextCase = Case.getCaseSuccessor();
- break;
- }
- }
- if (!NextCase)
- NextCase = Switch->getDefaultDest();
- return NextCase;
-}
-
namespace {
/// ThreadingPath is a path in the control flow of a loop that can be threaded
/// by cloning necessary basic blocks and replacing conditional branches with
@@ -835,19 +819,32 @@ private:
TPaths = std::move(TempList);
}
+ /// Fast helper to get the successor corresponding to a particular case value
+ /// for a switch statement.
+ BasicBlock *getNextCaseSuccessor(const APInt &NextState) {
+ // Precompute the value => successor mapping
+ if (CaseValToDest.empty()) {
+ for (auto Case : Switch->cases()) {
+ APInt CaseVal = Case.getCaseValue()->getValue();
+ CaseValToDest[CaseVal] = Case.getCaseSuccessor();
+ }
+ }
+
+ auto SuccIt = CaseValToDest.find(NextState);
+ return SuccIt == CaseValToDest.end() ? Switch->getDefaultDest()
+ : SuccIt->second;
+ }
+
// Two states are equivalent if they have the same switch destination.
// Unify the states in different threading path if the states are equivalent.
void unifyTPaths() {
- llvm::SmallDenseMap<BasicBlock *, APInt> DestToState;
+ SmallDenseMap<BasicBlock *, APInt> DestToState;
for (ThreadingPath &Path : TPaths) {
APInt NextState = Path.getExitValue();
- BasicBlock *Dest = getNextCaseSuccessor(Switch, NextState);
- auto StateIt = DestToState.find(Dest);
- if (StateIt == DestToState.end()) {
- DestToState.insert({Dest, NextState});
+ BasicBlock *Dest = getNextCaseSuccessor(NextState);
+ auto [StateIt, Inserted] = DestToState.try_emplace(Dest, NextState);
+ if (Inserted)
continue;
- }
-
if (NextState != StateIt->second) {
LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to "
<< StateIt->second << "\n");
@@ -861,6 +858,7 @@ private:
BasicBlock *SwitchBlock;
OptimizationRemarkEmitter *ORE;
std::vector<ThreadingPath> TPaths;
+ DenseMap<APInt, BasicBlock *> CaseValToDest;
LoopInfo *LI;
Loop *SwitchOuterLoop;
};
@@ -1162,6 +1160,24 @@ private:
SSAUpdate.RewriteAllUses(&DTU->getDomTree());
}
+ /// Helper to get the successor corresponding to a particular case value for
+ /// a switch statement.
+ /// TODO: Unify it with SwitchPaths->getNextCaseSuccessor(SwitchInst *Switch)
+ /// by updating cached value => successor mapping during threading.
+ static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch,
+ const APInt &NextState) {
+ BasicBlock *NextCase = nullptr;
+ for (auto Case : Switch->cases()) {
+ if (Case.getCaseValue()->getValue() == NextState) {
+ NextCase = Case.getCaseSuccessor();
+ break;
+ }
+ }
+ if (!NextCase)
+ NextCase = Switch->getDefaultDest();
+ return NextCase;
+ }
+
/// Clones a basic block, and adds it to the CFG.
///
/// This function also includes updating phi nodes in the successors of the
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 3a8ade8..42db424 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -2156,6 +2156,9 @@ bool GVNPass::processLoad(LoadInst *L) {
if (!L->isUnordered())
return false;
+ if (L->getType()->isTokenLikeTy())
+ return false;
+
if (L->use_empty()) {
salvageAndRemoveInstruction(L);
return true;
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 45d3d49..b9d332b 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2961,6 +2961,7 @@ public:
isa<FixedVectorType>(NewAI.getAllocatedType())
? cast<FixedVectorType>(NewAI.getAllocatedType())->getElementType()
: Type::getInt8Ty(NewAI.getContext());
+ unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy);
// Helper to check if a type is
// 1. A fixed vector type
@@ -2991,10 +2992,17 @@ public:
// Do not handle the case if
// 1. The store does not meet the conditions in the helper function
// 2. The store is volatile
+ // 3. The total store size is not a multiple of the allocated element
+ // type size
if (!IsTypeValidForTreeStructuredMerge(
SI->getValueOperand()->getType()) ||
SI->isVolatile())
return std::nullopt;
+ auto *VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
+ unsigned NumElts = VecTy->getNumElements();
+ unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType());
+ if (NumElts * EltSize % AllocatedEltTySize != 0)
+ return std::nullopt;
StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(),
SI->getValueOperand());
} else {
diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index 3ae570c..4f1ff7b 100644
--- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -20,9 +20,8 @@
using namespace llvm;
-namespace {
-void nameInstructions(Function &F) {
- for (auto &Arg : F.args()) {
+static void nameInstructions(Function &F) {
+ for (Argument &Arg : F.args()) {
if (!Arg.hasName())
Arg.setName("arg");
}
@@ -38,8 +37,6 @@ void nameInstructions(Function &F) {
}
}
-} // namespace
-
PreservedAnalyses InstructionNamerPass::run(Function &F,
FunctionAnalysisManager &FAM) {
nameInstructions(F);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cfa8d27..2388375 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2245,6 +2245,26 @@ public:
Align Alignment, const int64_t Diff, Value *Ptr0,
Value *PtrN, StridedPtrInfo &SPtrInfo) const;
+ /// Return true if an array of scalar loads can be replaced with a strided
+ /// load (with run-time stride).
+ /// \param PointerOps list of pointer arguments of loads.
+ /// \param ScalarTy type of loads.
+ /// \param CommonAlignment common alignement of loads as computed by
+ /// `computeCommonAlignment<LoadInst>`.
+ /// \param SortedIndicies is a list of indicies computed by this function such
+ /// that the sequence `PointerOps[SortedIndices[0]],
+ /// PointerOps[SortedIndicies[1]], ..., PointerOps[SortedIndices[n]]` is
+ /// ordered by the coefficient of the stride. For example, if PointerOps is
+ /// `%base + %stride, %base, %base + 2 * stride` the `SortedIndices` will be
+ /// `[1, 0, 2]`. We follow the convention that if `SortedIndices` has to be
+ /// `0, 1, 2, 3, ...` we return empty vector for `SortedIndicies`.
+ /// \param SPtrInfo If the function return `true`, it also sets all the fields
+ /// of `SPtrInfo` necessary to generate the strided load later.
+ bool analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+ Align CommonAlignment,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ StridedPtrInfo &SPtrInfo) const;
+
/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
/// \param VL list of loads.
@@ -6875,6 +6895,24 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
return false;
}
+bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps,
+ Type *ScalarTy, Align CommonAlignment,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ StridedPtrInfo &SPtrInfo) const {
+ const unsigned Sz = PointerOps.size();
+ FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, Sz);
+ if (Sz <= MinProfitableStridedLoads || !TTI->isTypeLegal(StridedLoadTy) ||
+ !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment))
+ return false;
+ if (const SCEV *Stride =
+ calculateRtStride(PointerOps, ScalarTy, *DL, *SE, SortedIndices)) {
+ SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size());
+ SPtrInfo.StrideSCEV = Stride;
+ return true;
+ }
+ return false;
+}
+
BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps, StridedPtrInfo &SPtrInfo,
@@ -6915,15 +6953,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
auto *VecTy = getWidenedType(ScalarTy, Sz);
Align CommonAlignment = computeCommonAlignment<LoadInst>(VL);
if (!IsSorted) {
- if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) {
- if (const SCEV *Stride =
- calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order);
- Stride && TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) {
- SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size());
- SPtrInfo.StrideSCEV = Stride;
- return LoadsState::StridedVectorize;
- }
- }
+ if (analyzeRtStrideCandidate(PointerOps, ScalarTy, CommonAlignment, Order,
+ SPtrInfo))
+ return LoadsState::StridedVectorize;
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment))
@@ -10632,7 +10664,9 @@ class InstructionsCompatibilityAnalysis {
void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) {
BasicBlock *Parent = nullptr;
// Checks if the instruction has supported opcode.
- auto IsSupportedInstruction = [&](Instruction *I) {
+ auto IsSupportedInstruction = [&](Instruction *I, bool AnyUndef) {
+ if (AnyUndef && (I->isIntDivRem() || I->isFPDivRem() || isa<CallInst>(I)))
+ return false;
return I && isSupportedOpcode(I->getOpcode()) &&
(!doesNotNeedToBeScheduled(I) || !R.isVectorized(I));
};
@@ -10640,10 +10674,13 @@ class InstructionsCompatibilityAnalysis {
// will be unable to schedule anyway.
SmallDenseSet<Value *, 8> Operands;
SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
+ bool AnyUndef = false;
for (Value *V : VL) {
auto *I = dyn_cast<Instruction>(V);
- if (!I)
+ if (!I) {
+ AnyUndef |= isa<UndefValue>(V);
continue;
+ }
if (!DT.isReachableFromEntry(I->getParent()))
continue;
if (Candidates.empty()) {
@@ -10678,7 +10715,7 @@ class InstructionsCompatibilityAnalysis {
if (P.second.size() < BestOpcodeNum)
continue;
for (Instruction *I : P.second) {
- if (IsSupportedInstruction(I) && !Operands.contains(I)) {
+ if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) {
MainOp = I;
BestOpcodeNum = P.second.size();
break;