aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp8
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp8
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp24
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp5
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h22
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp37
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp208
-rw-r--r--llvm/lib/Transforms/Scalar/EarlyCSE.cpp20
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp221
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp9
-rw-r--r--llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/InstructionNamer.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp16
-rw-r--r--llvm/lib/Transforms/Utils/LowerInvoke.cpp26
-rw-r--r--llvm/lib/Transforms/Utils/MisExpect.cpp61
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp10
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp61
22 files changed, 381 insertions, 393 deletions
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index cfdfd94..5066a99 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -1033,19 +1033,17 @@ private:
};
} // namespace
-namespace llvm {
template <>
-struct DenseMapInfo<typename CallsiteContextGraph<
+struct llvm::DenseMapInfo<typename CallsiteContextGraph<
ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>
: public DenseMapInfo<std::pair<Instruction *, unsigned>> {};
template <>
-struct DenseMapInfo<typename CallsiteContextGraph<
+struct llvm::DenseMapInfo<typename CallsiteContextGraph<
IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>
: public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
template <>
-struct DenseMapInfo<IndexCall>
+struct llvm::DenseMapInfo<IndexCall>
: public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
-} // end namespace llvm
namespace {
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index ac41fdd..2d5cb82 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -372,9 +372,7 @@ struct VTableSlot {
} // end anonymous namespace
-namespace llvm {
-
-template <> struct DenseMapInfo<VTableSlot> {
+template <> struct llvm::DenseMapInfo<VTableSlot> {
static VTableSlot getEmptyKey() {
return {DenseMapInfo<Metadata *>::getEmptyKey(),
DenseMapInfo<uint64_t>::getEmptyKey()};
@@ -393,7 +391,7 @@ template <> struct DenseMapInfo<VTableSlot> {
}
};
-template <> struct DenseMapInfo<VTableSlotSummary> {
+template <> struct llvm::DenseMapInfo<VTableSlotSummary> {
static VTableSlotSummary getEmptyKey() {
return {DenseMapInfo<StringRef>::getEmptyKey(),
DenseMapInfo<uint64_t>::getEmptyKey()};
@@ -412,8 +410,6 @@ template <> struct DenseMapInfo<VTableSlotSummary> {
}
};
-} // end namespace llvm
-
// Returns true if the function must be unreachable based on ValueInfo.
//
// In particular, identifies a function as unreachable in the following
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 59e103cd..73ec451 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -880,11 +880,13 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
// zext(bool) + C -> bool ? C + 1 : C
if (match(Op0, m_ZExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return createSelectInst(X, InstCombiner::AddOne(Op1C), Op1);
+ return createSelectInstWithUnknownProfile(X, InstCombiner::AddOne(Op1C),
+ Op1);
// sext(bool) + C -> bool ? C - 1 : C
if (match(Op0, m_SExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return createSelectInst(X, InstCombiner::SubOne(Op1C), Op1);
+ return createSelectInstWithUnknownProfile(X, InstCombiner::SubOne(Op1C),
+ Op1);
// ~X + C --> (C-1) - X
if (match(Op0, m_Not(m_Value(X)))) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 9b272c4..3ddf182 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -28,6 +28,10 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
+
/// This is the complement of getICmpCode, which turns an opcode and two
/// operands into either a constant true or false, or a brand new ICmp
/// instruction. The sign is passed in to determine which kind of predicate to
@@ -1272,7 +1276,8 @@ Value *InstCombinerImpl::foldEqOfParts(Value *Cmp0, Value *Cmp1, bool IsAnd) {
static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
bool IsAnd, bool IsLogical,
InstCombiner::BuilderTy &Builder,
- const SimplifyQuery &Q) {
+ const SimplifyQuery &Q,
+ Instruction &I) {
// Match an equality compare with a non-poison constant as Cmp0.
// Also, give up if the compare can be constant-folded to avoid looping.
CmpPredicate Pred0;
@@ -1306,9 +1311,12 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
return nullptr;
SubstituteCmp = Builder.CreateICmp(Pred1, Y, C);
}
- if (IsLogical)
- return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp)
- : Builder.CreateLogicalOr(Cmp0, SubstituteCmp);
+ if (IsLogical) {
+ Instruction *MDFrom =
+ ProfcheckDisableMetadataFixes && isa<SelectInst>(I) ? nullptr : &I;
+ return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp, "", MDFrom)
+ : Builder.CreateLogicalOr(Cmp0, SubstituteCmp, "", MDFrom);
+ }
return Builder.CreateBinOp(IsAnd ? Instruction::And : Instruction::Or, Cmp0,
SubstituteCmp);
}
@@ -3396,13 +3404,13 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
/*IsLogical*/ false, Builder))
return V;
- if (Value *V =
- foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical, Builder, Q))
+ if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical,
+ Builder, Q, I))
return V;
// We can convert this case to bitwise and, because both operands are used
// on the LHS, and as such poison from both will propagate.
- if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, IsAnd,
- /*IsLogical=*/false, Builder, Q)) {
+ if (Value *V = foldAndOrOfICmpsWithConstEq(
+ RHS, LHS, IsAnd, /*IsLogical=*/false, Builder, Q, I)) {
// If RHS is still used, we should drop samesign flag.
if (IsLogical && RHS->hasSameSign() && !RHS->use_empty()) {
RHS->setSameSign(false);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 56194fe..4c9b10a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2202,6 +2202,11 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
return commonCastTransforms(CI);
}
+Instruction *InstCombinerImpl::visitPtrToAddr(PtrToAddrInst &CI) {
+ // FIXME: Implement variants of ptrtoint folds.
+ return commonCastTransforms(CI);
+}
+
/// This input value (which is known to have vector type) is being zero extended
/// or truncated to the specified vector type. Since the zext/trunc is done
/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index e01c145..7071876 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -143,6 +143,7 @@ public:
Instruction *visitUIToFP(CastInst &CI);
Instruction *visitSIToFP(CastInst &CI);
Instruction *visitPtrToInt(PtrToIntInst &CI);
+ Instruction *visitPtrToAddr(PtrToAddrInst &CI);
Instruction *visitIntToPtr(IntToPtrInst &CI);
Instruction *visitBitCast(BitCastInst &CI);
Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
@@ -470,18 +471,19 @@ private:
Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable,
unsigned Depth = 0);
- SelectInst *createSelectInst(Value *C, Value *S1, Value *S2,
- const Twine &NameStr = "",
- InsertPosition InsertBefore = nullptr,
- Instruction *MDFrom = nullptr) {
- SelectInst *SI =
- SelectInst::Create(C, S1, S2, NameStr, InsertBefore, MDFrom);
- if (!MDFrom)
- setExplicitlyUnknownBranchWeightsIfProfiled(*SI, F, DEBUG_TYPE);
- return SI;
+public:
+ /// Create `select C, S1, S2`. Use only when the profile cannot be calculated
+ /// from existing profile metadata: if the Function has profiles, this will
+ /// set the profile of this select to "unknown".
+ SelectInst *
+ createSelectInstWithUnknownProfile(Value *C, Value *S1, Value *S2,
+ const Twine &NameStr = "",
+ InsertPosition InsertBefore = nullptr) {
+ auto *Sel = SelectInst::Create(C, S1, S2, NameStr, InsertBefore, nullptr);
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, F, DEBUG_TYPE);
+ return Sel;
}
-public:
/// Create and insert the idiom we use to indicate a block is unreachable
/// without having to rewrite the CFG from within InstCombine.
void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 5c747bb..9815644 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1069,27 +1069,22 @@ struct LoweredPHIRecord {
};
} // namespace
-namespace llvm {
- template<>
- struct DenseMapInfo<LoweredPHIRecord> {
- static inline LoweredPHIRecord getEmptyKey() {
- return LoweredPHIRecord(nullptr, 0);
- }
- static inline LoweredPHIRecord getTombstoneKey() {
- return LoweredPHIRecord(nullptr, 1);
- }
- static unsigned getHashValue(const LoweredPHIRecord &Val) {
- return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
- (Val.Width>>3);
- }
- static bool isEqual(const LoweredPHIRecord &LHS,
- const LoweredPHIRecord &RHS) {
- return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
- LHS.Width == RHS.Width;
- }
- };
-} // namespace llvm
-
+template <> struct llvm::DenseMapInfo<LoweredPHIRecord> {
+ static inline LoweredPHIRecord getEmptyKey() {
+ return LoweredPHIRecord(nullptr, 0);
+ }
+ static inline LoweredPHIRecord getTombstoneKey() {
+ return LoweredPHIRecord(nullptr, 1);
+ }
+ static unsigned getHashValue(const LoweredPHIRecord &Val) {
+ return DenseMapInfo<PHINode *>::getHashValue(Val.PN) ^ (Val.Shift >> 3) ^
+ (Val.Width >> 3);
+ }
+ static bool isEqual(const LoweredPHIRecord &LHS,
+ const LoweredPHIRecord &RHS) {
+ return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && LHS.Width == RHS.Width;
+ }
+};
/// This is an integer PHI and we know that it has an illegal type: see if it is
/// only used by trunc or trunc(lshr) operations. If so, we split the PHI into
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index d457e0c..899a3c1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1253,7 +1253,8 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
// shl (zext i1 X), C1 --> select (X, 1 << C1, 0)
if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
auto *NewC = Builder.CreateShl(ConstantInt::get(Ty, 1), C1);
- return createSelectInst(X, NewC, ConstantInt::getNullValue(Ty));
+ return createSelectInstWithUnknownProfile(X, NewC,
+ ConstantInt::getNullValue(Ty));
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 127a506..63e24a0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
@@ -107,7 +108,10 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I,
Value *ShrAmtZ =
IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(ShrAmt->getType()),
ShrAmt->getName() + ".z");
- Value *Select = IC.Builder.CreateSelect(ShrAmtZ, Lower, Upper);
+ // There is no existing !prof metadata we can derive the !prof metadata for
+ // this select.
+ Value *Select = IC.createSelectInstWithUnknownProfile(ShrAmtZ, Lower, Upper);
+ IC.Builder.Insert(Select);
Select->takeName(I);
return Select;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index d56a1af..82ac903 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1737,7 +1737,7 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
Constant *Zero = ConstantInt::getNullValue(BO.getType());
Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
- return createSelectInst(X, TVal, FVal);
+ return createSelectInstWithUnknownProfile(X, TVal, FVal);
}
static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 3704ad7..860f8f7 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -600,9 +600,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
!IsRISCV64 && !IsLoongArch64 &&
!(Mapping.Offset & (Mapping.Offset - 1)) &&
Mapping.Offset != kDynamicShadowSentinel;
- bool IsAndroidWithIfuncSupport =
- IsAndroid && !TargetTriple.isAndroidVersionLT(21);
- Mapping.InGlobal = ClWithIfunc && IsAndroidWithIfuncSupport && IsArmOrThumb;
+ Mapping.InGlobal = ClWithIfunc && IsAndroid && IsArmOrThumb;
return Mapping;
}
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 3f7003d..ff5f390 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -148,19 +148,16 @@ public:
class DFAJumpThreading {
public:
- DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
+ DFAJumpThreading(AssumptionCache *AC, DomTreeUpdater *DTU, LoopInfo *LI,
TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
- : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
+ : AC(AC), DTU(DTU), LI(LI), TTI(TTI), ORE(ORE) {}
bool run(Function &F);
bool LoopInfoBroken;
private:
void
- unfoldSelectInstrs(DominatorTree *DT,
- const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
- // TODO: Have everything use a single lazy DTU
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ unfoldSelectInstrs(const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts);
while (!Stack.empty()) {
@@ -168,7 +165,7 @@ private:
std::vector<SelectInstToUnfold> NewSIsToUnfold;
std::vector<BasicBlock *> NewBBs;
- unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);
+ unfold(DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);
// Put newly discovered select instructions into the work list.
llvm::append_range(Stack, NewSIsToUnfold);
@@ -181,7 +178,7 @@ private:
std::vector<BasicBlock *> *NewBBs);
AssumptionCache *AC;
- DominatorTree *DT;
+ DomTreeUpdater *DTU;
LoopInfo *LI;
TargetTransformInfo *TTI;
OptimizationRemarkEmitter *ORE;
@@ -401,6 +398,10 @@ struct ThreadingPath {
ExitVal = V->getValue();
IsExitValSet = true;
}
+ void setExitValue(const APInt &V) {
+ ExitVal = V;
+ IsExitValSet = true;
+ }
bool isExitValueSet() const { return IsExitValSet; }
/// Determinator is the basic block that determines the next state of the DFA.
@@ -583,44 +584,8 @@ struct AllSwitchPaths {
BasicBlock *getSwitchBlock() { return SwitchBlock; }
void run() {
- StateDefMap StateDef = getStateDefMap();
- if (StateDef.empty()) {
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
- Switch)
- << "Switch instruction is not predictable.";
- });
- return;
- }
-
- auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0));
- auto *SwitchPhiDefBB = SwitchPhi->getParent();
- VisitedBlocks VB;
- // Get paths from the determinator BBs to SwitchPhiDefBB
- std::vector<ThreadingPath> PathsToPhiDef =
- getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths);
- if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) {
- TPaths = std::move(PathsToPhiDef);
- return;
- }
-
- assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty());
- auto PathsLimit = MaxNumPaths / PathsToPhiDef.size();
- // Find and append paths from SwitchPhiDefBB to SwitchBlock.
- PathsType PathsToSwitchBB =
- paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit);
- if (PathsToSwitchBB.empty())
- return;
-
- std::vector<ThreadingPath> TempList;
- for (const ThreadingPath &Path : PathsToPhiDef) {
- for (const PathType &PathToSw : PathsToSwitchBB) {
- ThreadingPath PathCopy(Path);
- PathCopy.appendExcludingFirst(PathToSw);
- TempList.push_back(PathCopy);
- }
- }
- TPaths = std::move(TempList);
+ findTPaths();
+ unifyTPaths();
}
private:
@@ -812,21 +777,98 @@ private:
return Res;
}
+ // Find all threadable paths.
+ void findTPaths() {
+ StateDefMap StateDef = getStateDefMap();
+ if (StateDef.empty()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
+ Switch)
+ << "Switch instruction is not predictable.";
+ });
+ return;
+ }
+
+ auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0));
+ auto *SwitchPhiDefBB = SwitchPhi->getParent();
+ VisitedBlocks VB;
+ // Get paths from the determinator BBs to SwitchPhiDefBB
+ std::vector<ThreadingPath> PathsToPhiDef =
+ getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths);
+ if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) {
+ TPaths = std::move(PathsToPhiDef);
+ return;
+ }
+
+ assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty());
+ auto PathsLimit = MaxNumPaths / PathsToPhiDef.size();
+ // Find and append paths from SwitchPhiDefBB to SwitchBlock.
+ PathsType PathsToSwitchBB =
+ paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit);
+ if (PathsToSwitchBB.empty())
+ return;
+
+ std::vector<ThreadingPath> TempList;
+ for (const ThreadingPath &Path : PathsToPhiDef) {
+ for (const PathType &PathToSw : PathsToSwitchBB) {
+ ThreadingPath PathCopy(Path);
+ PathCopy.appendExcludingFirst(PathToSw);
+ TempList.push_back(PathCopy);
+ }
+ }
+ TPaths = std::move(TempList);
+ }
+
+ /// Fast helper to get the successor corresponding to a particular case value
+ /// for a switch statement.
+ BasicBlock *getNextCaseSuccessor(const APInt &NextState) {
+ // Precompute the value => successor mapping
+ if (CaseValToDest.empty()) {
+ for (auto Case : Switch->cases()) {
+ APInt CaseVal = Case.getCaseValue()->getValue();
+ CaseValToDest[CaseVal] = Case.getCaseSuccessor();
+ }
+ }
+
+ auto SuccIt = CaseValToDest.find(NextState);
+ return SuccIt == CaseValToDest.end() ? Switch->getDefaultDest()
+ : SuccIt->second;
+ }
+
+ // Two states are equivalent if they have the same switch destination.
+ // Unify the states in different threading path if the states are equivalent.
+ void unifyTPaths() {
+ SmallDenseMap<BasicBlock *, APInt> DestToState;
+ for (ThreadingPath &Path : TPaths) {
+ APInt NextState = Path.getExitValue();
+ BasicBlock *Dest = getNextCaseSuccessor(NextState);
+ auto [StateIt, Inserted] = DestToState.try_emplace(Dest, NextState);
+ if (Inserted)
+ continue;
+ if (NextState != StateIt->second) {
+ LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to "
+ << StateIt->second << "\n");
+ Path.setExitValue(StateIt->second);
+ }
+ }
+ }
+
unsigned NumVisited = 0;
SwitchInst *Switch;
BasicBlock *SwitchBlock;
OptimizationRemarkEmitter *ORE;
std::vector<ThreadingPath> TPaths;
+ DenseMap<APInt, BasicBlock *> CaseValToDest;
LoopInfo *LI;
Loop *SwitchOuterLoop;
};
struct TransformDFA {
- TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT,
+ TransformDFA(AllSwitchPaths *SwitchPaths, DomTreeUpdater *DTU,
AssumptionCache *AC, TargetTransformInfo *TTI,
OptimizationRemarkEmitter *ORE,
SmallPtrSet<const Value *, 32> EphValues)
- : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
+ : SwitchPaths(SwitchPaths), DTU(DTU), AC(AC), TTI(TTI), ORE(ORE),
EphValues(EphValues) {}
bool run() {
@@ -1002,19 +1044,16 @@ private:
SmallPtrSet<BasicBlock *, 16> BlocksToClean;
BlocksToClean.insert_range(successors(SwitchBlock));
- {
- DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
- for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
- createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
- NumPaths++;
- }
-
- // After all paths are cloned, now update the last successor of the cloned
- // path so it skips over the switch statement
- for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
- updateLastSuccessor(TPath, DuplicateMap, &DTU);
+ for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
+ createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, DTU);
+ NumPaths++;
}
+ // After all paths are cloned, now update the last successor of the cloned
+ // path so it skips over the switch statement
+ for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
+ updateLastSuccessor(TPath, DuplicateMap, DTU);
+
// For each instruction that was cloned and used outside, update its uses
updateSSA(NewDefs);
@@ -1118,7 +1157,25 @@ private:
}
// SSAUpdater handles phi placement and renaming uses with the appropriate
// value.
- SSAUpdate.RewriteAllUses(DT);
+ SSAUpdate.RewriteAllUses(&DTU->getDomTree());
+ }
+
+ /// Helper to get the successor corresponding to a particular case value for
+ /// a switch statement.
+ /// TODO: Unify it with SwitchPaths->getNextCaseSuccessor(SwitchInst *Switch)
+ /// by updating cached value => successor mapping during threading.
+ static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch,
+ const APInt &NextState) {
+ BasicBlock *NextCase = nullptr;
+ for (auto Case : Switch->cases()) {
+ if (Case.getCaseValue()->getValue() == NextState) {
+ NextCase = Case.getCaseSuccessor();
+ break;
+ }
+ }
+ if (!NextCase)
+ NextCase = Switch->getDefaultDest();
+ return NextCase;
}
/// Clones a basic block, and adds it to the CFG.
@@ -1335,28 +1392,13 @@ private:
return It != ClonedBBs.end() ? (*It).BB : nullptr;
}
- /// Helper to get the successor corresponding to a particular case value for
- /// a switch statement.
- BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) {
- BasicBlock *NextCase = nullptr;
- for (auto Case : Switch->cases()) {
- if (Case.getCaseValue()->getValue() == NextState) {
- NextCase = Case.getCaseSuccessor();
- break;
- }
- }
- if (!NextCase)
- NextCase = Switch->getDefaultDest();
- return NextCase;
- }
-
/// Returns true if IncomingBB is a predecessor of BB.
bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
return llvm::is_contained(predecessors(BB), IncomingBB);
}
AllSwitchPaths *SwitchPaths;
- DominatorTree *DT;
+ DomTreeUpdater *DTU;
AssumptionCache *AC;
TargetTransformInfo *TTI;
OptimizationRemarkEmitter *ORE;
@@ -1399,7 +1441,7 @@ bool DFAJumpThreading::run(Function &F) {
<< "candidate for jump threading\n");
LLVM_DEBUG(SI->dump());
- unfoldSelectInstrs(DT, Switch.getSelectInsts());
+ unfoldSelectInstrs(Switch.getSelectInsts());
if (!Switch.getSelectInsts().empty())
MadeChanges = true;
@@ -1421,7 +1463,7 @@ bool DFAJumpThreading::run(Function &F) {
}
#ifdef NDEBUG
- LI->verify(*DT);
+ LI->verify(DTU->getDomTree());
#endif
SmallPtrSet<const Value *, 32> EphValues;
@@ -1429,13 +1471,15 @@ bool DFAJumpThreading::run(Function &F) {
CodeMetrics::collectEphemeralValues(&F, AC, EphValues);
for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
- TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
+ TransformDFA Transform(&SwitchPaths, DTU, AC, TTI, ORE, EphValues);
if (Transform.run())
MadeChanges = LoopInfoBroken = true;
}
+ DTU->flush();
+
#ifdef EXPENSIVE_CHECKS
- assert(DT->verify(DominatorTree::VerificationLevel::Full));
+ assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full));
verifyFunction(F, &dbgs());
#endif
@@ -1450,7 +1494,9 @@ PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
OptimizationRemarkEmitter ORE(&F);
- DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE);
+
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ DFAJumpThreading ThreadImpl(&AC, &DTU, &LI, &TTI, &ORE);
if (!ThreadImpl.run(F))
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 0f8cc6c..2afa7b7 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -108,7 +108,7 @@ struct SimpleValue {
// of instruction handled below (UnaryOperator, etc.).
if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
if (Function *F = CI->getCalledFunction()) {
- switch ((Intrinsic::ID)F->getIntrinsicID()) {
+ switch (F->getIntrinsicID()) {
case Intrinsic::experimental_constrained_fadd:
case Intrinsic::experimental_constrained_fsub:
case Intrinsic::experimental_constrained_fmul:
@@ -154,9 +154,7 @@ struct SimpleValue {
} // end anonymous namespace
-namespace llvm {
-
-template <> struct DenseMapInfo<SimpleValue> {
+template <> struct llvm::DenseMapInfo<SimpleValue> {
static inline SimpleValue getEmptyKey() {
return DenseMapInfo<Instruction *>::getEmptyKey();
}
@@ -169,8 +167,6 @@ template <> struct DenseMapInfo<SimpleValue> {
static bool isEqual(SimpleValue LHS, SimpleValue RHS);
};
-} // end namespace llvm
-
/// Match a 'select' including an optional 'not's of the condition.
static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
Value *&B,
@@ -509,9 +505,7 @@ struct CallValue {
} // end anonymous namespace
-namespace llvm {
-
-template <> struct DenseMapInfo<CallValue> {
+template <> struct llvm::DenseMapInfo<CallValue> {
static inline CallValue getEmptyKey() {
return DenseMapInfo<Instruction *>::getEmptyKey();
}
@@ -524,8 +518,6 @@ template <> struct DenseMapInfo<CallValue> {
static bool isEqual(CallValue LHS, CallValue RHS);
};
-} // end namespace llvm
-
unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
Instruction *Inst = Val.Inst;
@@ -580,9 +572,7 @@ struct GEPValue {
} // namespace
-namespace llvm {
-
-template <> struct DenseMapInfo<GEPValue> {
+template <> struct llvm::DenseMapInfo<GEPValue> {
static inline GEPValue getEmptyKey() {
return DenseMapInfo<Instruction *>::getEmptyKey();
}
@@ -595,8 +585,6 @@ template <> struct DenseMapInfo<GEPValue> {
static bool isEqual(const GEPValue &LHS, const GEPValue &RHS);
};
-} // end namespace llvm
-
unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {
auto *GEP = cast<GetElementPtrInst>(Val.Inst);
if (Val.ConstantOffset.has_value())
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 638952a..42db424 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -170,9 +170,7 @@ struct llvm::GVNPass::Expression {
}
};
-namespace llvm {
-
-template <> struct DenseMapInfo<GVNPass::Expression> {
+template <> struct llvm::DenseMapInfo<GVNPass::Expression> {
static inline GVNPass::Expression getEmptyKey() { return ~0U; }
static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
@@ -188,8 +186,6 @@ template <> struct DenseMapInfo<GVNPass::Expression> {
}
};
-} // end namespace llvm
-
/// Represents a particular available value that we know how to materialize.
/// Materialization of an AvailableValue never fails. An AvailableValue is
/// implicitly associated with a rematerialization point which is the
@@ -2084,13 +2080,6 @@ bool GVNPass::processNonLocalLoad(LoadInst *Load) {
return Changed;
}
-static bool hasUsersIn(Value *V, BasicBlock *BB) {
- return any_of(V->users(), [BB](User *U) {
- auto *I = dyn_cast<Instruction>(U);
- return I && I->getParent() == BB;
- });
-}
-
bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
Value *V = IntrinsicI->getArgOperand(0);
@@ -2149,85 +2138,7 @@ bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
}
Constant *True = ConstantInt::getTrue(V->getContext());
- bool Changed = false;
-
- for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
- BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
-
- // This property is only true in dominated successors, propagateEquality
- // will check dominance for us.
- Changed |= propagateEquality(V, True, Edge, false);
- }
-
- // We can replace assume value with true, which covers cases like this:
- // call void @llvm.assume(i1 %cmp)
- // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
- ReplaceOperandsWithMap[V] = True;
-
- // Similarly, after assume(!NotV) we know that NotV == false.
- Value *NotV;
- if (match(V, m_Not(m_Value(NotV))))
- ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext());
-
- // If we find an equality fact, canonicalize all dominated uses in this block
- // to one of the two values. We heuristically choice the "oldest" of the
- // two where age is determined by value number. (Note that propagateEquality
- // above handles the cross block case.)
- //
- // Key case to cover are:
- // 1)
- // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
- // call void @llvm.assume(i1 %cmp)
- // ret float %0 ; will change it to ret float 3.000000e+00
- // 2)
- // %load = load float, float* %addr
- // %cmp = fcmp oeq float %load, %0
- // call void @llvm.assume(i1 %cmp)
- // ret float %load ; will change it to ret float %0
- if (auto *CmpI = dyn_cast<CmpInst>(V)) {
- if (CmpI->isEquivalence()) {
- Value *CmpLHS = CmpI->getOperand(0);
- Value *CmpRHS = CmpI->getOperand(1);
- // Heuristically pick the better replacement -- the choice of heuristic
- // isn't terribly important here, but the fact we canonicalize on some
- // replacement is for exposing other simplifications.
- // TODO: pull this out as a helper function and reuse w/ existing
- // (slightly different) logic.
- if (isa<Constant>(CmpLHS) && !isa<Constant>(CmpRHS))
- std::swap(CmpLHS, CmpRHS);
- if (!isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))
- std::swap(CmpLHS, CmpRHS);
- if ((isa<Argument>(CmpLHS) && isa<Argument>(CmpRHS)) ||
- (isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))) {
- // Move the 'oldest' value to the right-hand side, using the value
- // number as a proxy for age.
- uint32_t LVN = VN.lookupOrAdd(CmpLHS);
- uint32_t RVN = VN.lookupOrAdd(CmpRHS);
- if (LVN < RVN)
- std::swap(CmpLHS, CmpRHS);
- }
-
- // Handle degenerate case where we either haven't pruned a dead path or a
- // removed a trivial assume yet.
- if (isa<Constant>(CmpLHS) && isa<Constant>(CmpRHS))
- return Changed;
-
- LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
- << *CmpLHS << " with "
- << *CmpRHS << " in block "
- << IntrinsicI->getParent()->getName() << "\n");
-
- // Setup the replacement map - this handles uses within the same block.
- if (hasUsersIn(CmpLHS, IntrinsicI->getParent()))
- ReplaceOperandsWithMap[CmpLHS] = CmpRHS;
-
- // NOTE: The non-block local cases are handled by the call to
- // propagateEquality above; this block is just about handling the block
- // local cases. TODO: There's a bunch of logic in propagateEqualiy which
- // isn't duplicated for the block local case, can we share it somehow?
- }
- }
- return Changed;
+ return propagateEquality(V, True, IntrinsicI);
}
static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
@@ -2245,6 +2156,9 @@ bool GVNPass::processLoad(LoadInst *L) {
if (!L->isUnordered())
return false;
+ if (L->getType()->isTokenLikeTy())
+ return false;
+
if (L->use_empty()) {
salvageAndRemoveInstruction(L);
return true;
@@ -2526,39 +2440,28 @@ void GVNPass::assignBlockRPONumber(Function &F) {
InvalidBlockRPONumbers = false;
}
-bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const {
- bool Changed = false;
- for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
- Use &Operand = Instr->getOperandUse(OpNum);
- auto It = ReplaceOperandsWithMap.find(Operand.get());
- if (It != ReplaceOperandsWithMap.end()) {
- const DataLayout &DL = Instr->getDataLayout();
- if (!canReplacePointersInUseIfEqual(Operand, It->second, DL))
- continue;
-
- LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with "
- << *It->second << " in instruction " << *Instr << '\n');
- Instr->setOperand(OpNum, It->second);
- Changed = true;
- }
- }
- return Changed;
-}
-
-/// The given values are known to be equal in every block
+/// The given values are known to be equal in every use
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
-/// If DominatesByEdge is false, then it means that we will propagate the RHS
-/// value starting from the end of Root.Start.
-bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
- const BasicBlockEdge &Root,
- bool DominatesByEdge) {
+/// The Root may either be a basic block edge (for conditions) or an
+/// instruction (for assumes).
+bool GVNPass::propagateEquality(
+ Value *LHS, Value *RHS,
+ const std::variant<BasicBlockEdge, Instruction *> &Root) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
Worklist.push_back(std::make_pair(LHS, RHS));
bool Changed = false;
- // For speed, compute a conservative fast approximation to
- // DT->dominates(Root, Root.getEnd());
- const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
+ SmallVector<const BasicBlock *> DominatedBlocks;
+ if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) {
+ // For speed, compute a conservative fast approximation to
+ // DT->dominates(Root, Root.getEnd());
+ if (isOnlyReachableViaThisEdge(*Edge, DT))
+ DominatedBlocks.push_back(Edge->getEnd());
+ } else {
+ Instruction *I = std::get<Instruction *>(Root);
+ for (const auto *Node : DT->getNode(I->getParent())->children())
+ DominatedBlocks.push_back(Node->getBlock());
+ }
while (!Worklist.empty()) {
std::pair<Value*, Value*> Item = Worklist.pop_back_val();
@@ -2606,9 +2509,9 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
// using the leader table is about compiling faster, not optimizing better).
// The leader table only tracks basic blocks, not edges. Only add to if we
// have the simple case where the edge dominates the end.
- if (RootDominatesEnd && !isa<Instruction>(RHS) &&
- canReplacePointersIfEqual(LHS, RHS, DL))
- LeaderTable.insert(LVN, RHS, Root.getEnd());
+ if (!isa<Instruction>(RHS) && canReplacePointersIfEqual(LHS, RHS, DL))
+ for (const BasicBlock *BB : DominatedBlocks)
+ LeaderTable.insert(LVN, RHS, BB);
// Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
// LHS always has at least one use that is not dominated by Root, this will
@@ -2618,12 +2521,14 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
auto CanReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
return canReplacePointersInUseIfEqual(U, To, DL);
};
- unsigned NumReplacements =
- DominatesByEdge
- ? replaceDominatedUsesWithIf(LHS, RHS, *DT, Root,
- CanReplacePointersCallBack)
- : replaceDominatedUsesWithIf(LHS, RHS, *DT, Root.getStart(),
- CanReplacePointersCallBack);
+ unsigned NumReplacements;
+ if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root))
+ NumReplacements = replaceDominatedUsesWithIf(
+ LHS, RHS, *DT, *Edge, CanReplacePointersCallBack);
+ else
+ NumReplacements = replaceDominatedUsesWithIf(
+ LHS, RHS, *DT, std::get<Instruction *>(Root),
+ CanReplacePointersCallBack);
if (NumReplacements > 0) {
Changed = true;
@@ -2682,26 +2587,45 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
// If the number we were assigned was brand new then there is no point in
// looking for an instruction realizing it: there cannot be one!
if (Num < NextNum) {
- Value *NotCmp = findLeader(Root.getEnd(), Num);
- if (NotCmp && isa<Instruction>(NotCmp)) {
- unsigned NumReplacements =
- DominatesByEdge
- ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
- : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
- Root.getStart());
- Changed |= NumReplacements > 0;
- NumGVNEqProp += NumReplacements;
- // Cached information for anything that uses NotCmp will be invalid.
- if (MD)
- MD->invalidateCachedPointerInfo(NotCmp);
+ for (const auto &Entry : LeaderTable.getLeaders(Num)) {
+ // Only look at leaders that either dominate the start of the edge,
+ // or are dominated by the end. This check is not necessary for
+ // correctness, it only discards cases for which the following
+ // use replacement will not work anyway.
+ if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) {
+ if (!DT->dominates(Entry.BB, Edge->getStart()) &&
+ !DT->dominates(Edge->getEnd(), Entry.BB))
+ continue;
+ } else {
+ auto *InstBB = std::get<Instruction *>(Root)->getParent();
+ if (!DT->dominates(Entry.BB, InstBB) &&
+ !DT->dominates(InstBB, Entry.BB))
+ continue;
+ }
+
+ Value *NotCmp = Entry.Val;
+ if (NotCmp && isa<Instruction>(NotCmp)) {
+ unsigned NumReplacements;
+ if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root))
+ NumReplacements =
+ replaceDominatedUsesWith(NotCmp, NotVal, *DT, *Edge);
+ else
+ NumReplacements = replaceDominatedUsesWith(
+ NotCmp, NotVal, *DT, std::get<Instruction *>(Root));
+ Changed |= NumReplacements > 0;
+ NumGVNEqProp += NumReplacements;
+ // Cached information for anything that uses NotCmp will be invalid.
+ if (MD)
+ MD->invalidateCachedPointerInfo(NotCmp);
+ }
}
}
// Ensure that any instruction in scope that gets the "A < B" value number
// is replaced with false.
// The leader table only tracks basic blocks, not edges. Only add to if we
// have the simple case where the edge dominates the end.
- if (RootDominatesEnd)
- LeaderTable.insert(Num, NotVal, Root.getEnd());
+ for (const BasicBlock *BB : DominatedBlocks)
+ LeaderTable.insert(Num, NotVal, BB);
continue;
}
@@ -2789,11 +2713,11 @@ bool GVNPass::processInstruction(Instruction *I) {
Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
BasicBlockEdge TrueE(Parent, TrueSucc);
- Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
+ Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
BasicBlockEdge FalseE(Parent, FalseSucc);
- Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
+ Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
return Changed;
}
@@ -2814,7 +2738,7 @@ bool GVNPass::processInstruction(Instruction *I) {
// If there is only a single edge, propagate the case value into it.
if (SwitchEdges.lookup(Dst) == 1) {
BasicBlockEdge E(Parent, Dst);
- Changed |= propagateEquality(SwitchCond, Case.getCaseValue(), E, true);
+ Changed |= propagateEquality(SwitchCond, Case.getCaseValue(), E);
}
}
return Changed;
@@ -2942,8 +2866,6 @@ bool GVNPass::processBlock(BasicBlock *BB) {
if (DeadBlocks.count(BB))
return false;
- // Clearing map before every BB because it can be used only for single BB.
- ReplaceOperandsWithMap.clear();
bool ChangedFunction = false;
// Since we may not have visited the input blocks of the phis, we can't
@@ -2955,11 +2877,8 @@ bool GVNPass::processBlock(BasicBlock *BB) {
for (PHINode *PN : PHINodesToRemove) {
removeInstruction(PN);
}
- for (Instruction &Inst : make_early_inc_range(*BB)) {
- if (!ReplaceOperandsWithMap.empty())
- ChangedFunction |= replaceOperandsForInBlockEquality(&Inst);
+ for (Instruction &Inst : make_early_inc_range(*BB))
ChangedFunction |= processInstruction(&Inst);
- }
return ChangedFunction;
}
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 3c1a8ba..80aa98d 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -434,10 +434,6 @@ private:
int StoreCount = 0;
};
-} // end anonymous namespace
-
-namespace llvm {
-
struct ExactEqualsExpression {
const Expression &E;
@@ -449,8 +445,9 @@ struct ExactEqualsExpression {
return E.exactlyEquals(Other);
}
};
+} // end anonymous namespace
-template <> struct DenseMapInfo<const Expression *> {
+template <> struct llvm::DenseMapInfo<const Expression *> {
static const Expression *getEmptyKey() {
auto Val = static_cast<uintptr_t>(-1);
Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
@@ -493,8 +490,6 @@ template <> struct DenseMapInfo<const Expression *> {
}
};
-} // end namespace llvm
-
namespace {
class NewGVN {
diff --git a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
index 2190dcd..a87822c 100644
--- a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
+++ b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -84,10 +84,6 @@ public:
bool run();
};
-} // anonymous namespace
-
-namespace llvm {
-
struct FrozenIndPHIInfo {
// A freeze instruction that uses an induction phi
FreezeInst *FI = nullptr;
@@ -103,7 +99,9 @@ struct FrozenIndPHIInfo {
bool operator==(const FrozenIndPHIInfo &Other) { return FI == Other.FI; }
};
-template <> struct DenseMapInfo<FrozenIndPHIInfo> {
+} // namespace
+
+template <> struct llvm::DenseMapInfo<FrozenIndPHIInfo> {
static inline FrozenIndPHIInfo getEmptyKey() {
return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getEmptyKey(),
DenseMapInfo<BinaryOperator *>::getEmptyKey());
@@ -124,8 +122,6 @@ template <> struct DenseMapInfo<FrozenIndPHIInfo> {
};
};
-} // end namespace llvm
-
// Given U = (value, user), replace value with freeze(value), and let
// SCEV forget user. The inserted freeze is placed in the preheader.
void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) {
diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index 3ae570c..4f1ff7b 100644
--- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -20,9 +20,8 @@
using namespace llvm;
-namespace {
-void nameInstructions(Function &F) {
- for (auto &Arg : F.args()) {
+static void nameInstructions(Function &F) {
+ for (Argument &Arg : F.args()) {
if (!Arg.hasName())
Arg.setName("arg");
}
@@ -38,8 +37,6 @@ void nameInstructions(Function &F) {
}
}
-} // namespace
-
PreservedAnalyses InstructionNamerPass::run(Function &F,
FunctionAnalysisManager &FAM) {
nameInstructions(F);
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index b6ca52e..46f2903 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3246,6 +3246,13 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
return ::replaceDominatedUsesWith(From, To, Dominates);
}
+unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
+ DominatorTree &DT,
+ const Instruction *I) {
+ auto Dominates = [&](const Use &U) { return DT.dominates(I, U); };
+ return ::replaceDominatedUsesWith(From, To, Dominates);
+}
+
unsigned llvm::replaceDominatedUsesWithIf(
Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Root,
function_ref<bool(const Use &U, const Value *To)> ShouldReplace) {
@@ -3264,6 +3271,15 @@ unsigned llvm::replaceDominatedUsesWithIf(
return ::replaceDominatedUsesWith(From, To, DominatesAndShouldReplace);
}
+unsigned llvm::replaceDominatedUsesWithIf(
+ Value *From, Value *To, DominatorTree &DT, const Instruction *I,
+ function_ref<bool(const Use &U, const Value *To)> ShouldReplace) {
+ auto DominatesAndShouldReplace = [&](const Use &U) {
+ return DT.dominates(I, U) && ShouldReplace(U, To);
+ };
+ return ::replaceDominatedUsesWith(From, To, DominatesAndShouldReplace);
+}
+
bool llvm::callsGCLeafFunction(const CallBase *Call,
const TargetLibraryInfo &TLI) {
// Check if the function is specifically marked as a gc leaf function.
diff --git a/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index ff2ab3c..cecb662 100644
--- a/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -27,15 +27,15 @@ using namespace llvm;
STATISTIC(NumInvokes, "Number of invokes replaced");
namespace {
- class LowerInvokeLegacyPass : public FunctionPass {
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit LowerInvokeLegacyPass() : FunctionPass(ID) {
- initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
- };
-}
+class LowerInvokeLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LowerInvokeLegacyPass() : FunctionPass(ID) {
+ initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+};
+} // namespace
char LowerInvokeLegacyPass::ID = 0;
INITIALIZE_PASS(LowerInvokeLegacyPass, "lowerinvoke",
@@ -78,11 +78,12 @@ bool LowerInvokeLegacyPass::runOnFunction(Function &F) {
return runImpl(F);
}
-namespace llvm {
-char &LowerInvokePassID = LowerInvokeLegacyPass::ID;
+char &llvm::LowerInvokePassID = LowerInvokeLegacyPass::ID;
// Public Interface To the LowerInvoke pass.
-FunctionPass *createLowerInvokePass() { return new LowerInvokeLegacyPass(); }
+FunctionPass *llvm::createLowerInvokePass() {
+ return new LowerInvokeLegacyPass();
+}
PreservedAnalyses LowerInvokePass::run(Function &F,
FunctionAnalysisManager &AM) {
@@ -92,4 +93,3 @@ PreservedAnalyses LowerInvokePass::run(Function &F,
return PreservedAnalyses::none();
}
-}
diff --git a/llvm/lib/Transforms/Utils/MisExpect.cpp b/llvm/lib/Transforms/Utils/MisExpect.cpp
index ca7e09d..1585e9e 100644
--- a/llvm/lib/Transforms/Utils/MisExpect.cpp
+++ b/llvm/lib/Transforms/Utils/MisExpect.cpp
@@ -48,8 +48,6 @@
using namespace llvm;
using namespace misexpect;
-namespace llvm {
-
// Command line option to enable/disable the warning when profile data suggests
// a mismatch with the use of the llvm.expect intrinsic
static cl::opt<bool> PGOWarnMisExpect(
@@ -63,22 +61,18 @@ static cl::opt<uint32_t> MisExpectTolerance(
cl::desc("Prevents emitting diagnostics when profile counts are "
"within N% of the threshold.."));
-} // namespace llvm
-
-namespace {
-
-bool isMisExpectDiagEnabled(LLVMContext &Ctx) {
+static bool isMisExpectDiagEnabled(const LLVMContext &Ctx) {
return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested();
}
-uint32_t getMisExpectTolerance(LLVMContext &Ctx) {
+static uint32_t getMisExpectTolerance(const LLVMContext &Ctx) {
return std::max(static_cast<uint32_t>(MisExpectTolerance),
Ctx.getDiagnosticsMisExpectTolerance());
}
-Instruction *getInstCondition(Instruction *I) {
+static const Instruction *getInstCondition(const Instruction *I) {
assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
- Instruction *Ret = nullptr;
+ const Instruction *Ret = nullptr;
if (auto *B = dyn_cast<BranchInst>(I)) {
Ret = dyn_cast<Instruction>(B->getCondition());
}
@@ -97,8 +91,8 @@ Instruction *getInstCondition(Instruction *I) {
return Ret ? Ret : I;
}
-void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
- uint64_t ProfCount, uint64_t TotalCount) {
+static void emitMisexpectDiagnostic(const Instruction *I, LLVMContext &Ctx,
+ uint64_t ProfCount, uint64_t TotalCount) {
double PercentageCorrect = (double)ProfCount / TotalCount;
auto PerString =
formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
@@ -106,20 +100,16 @@ void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
"Potential performance regression from use of the llvm.expect intrinsic: "
"Annotation was correct on {0} of profiled executions.",
PerString);
- Instruction *Cond = getInstCondition(I);
+ const Instruction *Cond = getInstCondition(I);
if (isMisExpectDiagEnabled(Ctx))
Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Twine(PerString)));
OptimizationRemarkEmitter ORE(I->getParent()->getParent());
ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
}
-} // namespace
-
-namespace llvm {
-namespace misexpect {
-
-void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
- ArrayRef<uint32_t> ExpectedWeights) {
+void misexpect::verifyMisExpect(const Instruction &I,
+ ArrayRef<uint32_t> RealWeights,
+ ArrayRef<uint32_t> ExpectedWeights) {
// To determine if we emit a diagnostic, we need to compare the branch weights
// from the profile to those added by the llvm.expect intrinsic.
// So first, we extract the "likely" and "unlikely" weights from
@@ -128,15 +118,13 @@ void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
uint64_t LikelyBranchWeight = 0,
UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max();
size_t MaxIndex = 0;
- for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) {
- uint32_t V = ExpectedWeights[Idx];
+ for (const auto &[Idx, V] : enumerate(ExpectedWeights)) {
if (LikelyBranchWeight < V) {
LikelyBranchWeight = V;
MaxIndex = Idx;
}
- if (UnlikelyBranchWeight > V) {
+ if (UnlikelyBranchWeight > V)
UnlikelyBranchWeight = V;
- }
}
const uint64_t ProfiledWeight = RealWeights[MaxIndex];
@@ -161,7 +149,7 @@ void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal);
// clamp tolerance range to [0, 100)
- auto Tolerance = getMisExpectTolerance(I.getContext());
+ uint32_t Tolerance = getMisExpectTolerance(I.getContext());
Tolerance = std::clamp(Tolerance, 0u, 99u);
// Allow users to relax checking by N% i.e., if they use a 5% tolerance,
@@ -175,8 +163,8 @@ void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
RealWeightsTotal);
}
-void checkBackendInstrumentation(Instruction &I,
- const ArrayRef<uint32_t> RealWeights) {
+void misexpect::checkBackendInstrumentation(const Instruction &I,
+ ArrayRef<uint32_t> RealWeights) {
// Backend checking assumes any existing weight comes from an `llvm.expect`
// intrinsic. However, SampleProfiling + ThinLTO add branch weights multiple
// times, leading to an invalid assumption in our checking. Backend checks
@@ -190,24 +178,19 @@ void checkBackendInstrumentation(Instruction &I,
verifyMisExpect(I, RealWeights, ExpectedWeights);
}
-void checkFrontendInstrumentation(Instruction &I,
- const ArrayRef<uint32_t> ExpectedWeights) {
+void misexpect::checkFrontendInstrumentation(
+ const Instruction &I, ArrayRef<uint32_t> ExpectedWeights) {
SmallVector<uint32_t> RealWeights;
if (!extractBranchWeights(I, RealWeights))
return;
verifyMisExpect(I, RealWeights, ExpectedWeights);
}
-void checkExpectAnnotations(Instruction &I,
- const ArrayRef<uint32_t> ExistingWeights,
- bool IsFrontend) {
- if (IsFrontend) {
+void misexpect::checkExpectAnnotations(const Instruction &I,
+ ArrayRef<uint32_t> ExistingWeights,
+ bool IsFrontend) {
+ if (IsFrontend)
checkFrontendInstrumentation(I, ExistingWeights);
- } else {
+ else
checkBackendInstrumentation(I, ExistingWeights);
- }
}
-
-} // namespace misexpect
-} // namespace llvm
-#undef DEBUG_TYPE
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 155fcc5..d831c27 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5959,7 +5959,11 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
unsigned PreviousEdges = OtherCases->size();
if (OtherDest == SI->getDefaultDest())
++PreviousEdges;
- for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
+ unsigned E = PreviousEdges - 1;
+ // Remove all incoming values from OtherDest if OtherDest is unreachable.
+ if (NewBI->isUnconditional())
+ ++E;
+ for (unsigned I = 0; I != E; ++I)
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
}
@@ -7736,8 +7740,7 @@ struct SwitchSuccWrapper {
DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> *PhiPredIVs;
};
-namespace llvm {
-template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
+template <> struct llvm::DenseMapInfo<const SwitchSuccWrapper *> {
static const SwitchSuccWrapper *getEmptyKey() {
return static_cast<SwitchSuccWrapper *>(
DenseMapInfo<void *>::getEmptyKey());
@@ -7805,7 +7808,6 @@ template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
return true;
}
};
-} // namespace llvm
bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
DomTreeUpdater *DTU) {
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cfa8d27..2388375 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2245,6 +2245,26 @@ public:
Align Alignment, const int64_t Diff, Value *Ptr0,
Value *PtrN, StridedPtrInfo &SPtrInfo) const;
+ /// Return true if an array of scalar loads can be replaced with a strided
+ /// load (with run-time stride).
+ /// \param PointerOps list of pointer arguments of loads.
+ /// \param ScalarTy type of loads.
+ /// \param CommonAlignment common alignement of loads as computed by
+ /// `computeCommonAlignment<LoadInst>`.
+ /// \param SortedIndicies is a list of indicies computed by this function such
+ /// that the sequence `PointerOps[SortedIndices[0]],
+ /// PointerOps[SortedIndicies[1]], ..., PointerOps[SortedIndices[n]]` is
+ /// ordered by the coefficient of the stride. For example, if PointerOps is
+ /// `%base + %stride, %base, %base + 2 * stride` the `SortedIndices` will be
+ /// `[1, 0, 2]`. We follow the convention that if `SortedIndices` has to be
+ /// `0, 1, 2, 3, ...` we return empty vector for `SortedIndicies`.
+ /// \param SPtrInfo If the function return `true`, it also sets all the fields
+ /// of `SPtrInfo` necessary to generate the strided load later.
+ bool analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+ Align CommonAlignment,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ StridedPtrInfo &SPtrInfo) const;
+
/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
/// \param VL list of loads.
@@ -6875,6 +6895,24 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
return false;
}
+bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps,
+ Type *ScalarTy, Align CommonAlignment,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ StridedPtrInfo &SPtrInfo) const {
+ const unsigned Sz = PointerOps.size();
+ FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, Sz);
+ if (Sz <= MinProfitableStridedLoads || !TTI->isTypeLegal(StridedLoadTy) ||
+ !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment))
+ return false;
+ if (const SCEV *Stride =
+ calculateRtStride(PointerOps, ScalarTy, *DL, *SE, SortedIndices)) {
+ SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size());
+ SPtrInfo.StrideSCEV = Stride;
+ return true;
+ }
+ return false;
+}
+
BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps, StridedPtrInfo &SPtrInfo,
@@ -6915,15 +6953,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
auto *VecTy = getWidenedType(ScalarTy, Sz);
Align CommonAlignment = computeCommonAlignment<LoadInst>(VL);
if (!IsSorted) {
- if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) {
- if (const SCEV *Stride =
- calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order);
- Stride && TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) {
- SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size());
- SPtrInfo.StrideSCEV = Stride;
- return LoadsState::StridedVectorize;
- }
- }
+ if (analyzeRtStrideCandidate(PointerOps, ScalarTy, CommonAlignment, Order,
+ SPtrInfo))
+ return LoadsState::StridedVectorize;
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment))
@@ -10632,7 +10664,9 @@ class InstructionsCompatibilityAnalysis {
void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) {
BasicBlock *Parent = nullptr;
// Checks if the instruction has supported opcode.
- auto IsSupportedInstruction = [&](Instruction *I) {
+ auto IsSupportedInstruction = [&](Instruction *I, bool AnyUndef) {
+ if (AnyUndef && (I->isIntDivRem() || I->isFPDivRem() || isa<CallInst>(I)))
+ return false;
return I && isSupportedOpcode(I->getOpcode()) &&
(!doesNotNeedToBeScheduled(I) || !R.isVectorized(I));
};
@@ -10640,10 +10674,13 @@ class InstructionsCompatibilityAnalysis {
// will be unable to schedule anyway.
SmallDenseSet<Value *, 8> Operands;
SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
+ bool AnyUndef = false;
for (Value *V : VL) {
auto *I = dyn_cast<Instruction>(V);
- if (!I)
+ if (!I) {
+ AnyUndef |= isa<UndefValue>(V);
continue;
+ }
if (!DT.isReachableFromEntry(I->getParent()))
continue;
if (Candidates.empty()) {
@@ -10678,7 +10715,7 @@ class InstructionsCompatibilityAnalysis {
if (P.second.size() < BestOpcodeNum)
continue;
for (Instruction *I : P.second) {
- if (IsSupportedInstruction(I) && !Operands.contains(I)) {
+ if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) {
MainOp = I;
BestOpcodeNum = P.second.size();
break;