aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorNAKAMURA Takumi <geek4civic@gmail.com>2025-01-10 19:25:56 +0900
committerNAKAMURA Takumi <geek4civic@gmail.com>2025-01-10 19:25:56 +0900
commit63f5dc16d6bfca0512fb034052b41d13c3751e20 (patch)
treee70266be1fda941e0974e71e3d2c1cf080081311 /llvm/lib/Transforms
parent9e5734688ed3d5f6b3fb76a26b3d90a736d60781 (diff)
parent397ac44f623f891d8f05d6673a95984ac0a26671 (diff)
downloadllvm-users/chapuni/cov/single/unify.zip
llvm-users/chapuni/cov/single/unify.tar.gz
llvm-users/chapuni/cov/single/unify.tar.bz2
Merge branch 'main' into users/chapuni/cov/single/unifyusers/chapuni/cov/single/unify
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Coroutines/Coroutines.cpp1
-rw-r--r--llvm/lib/Transforms/IPO/FunctionSpecialization.cpp14
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp49
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp11
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp243
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp5
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp92
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp23
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp10
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp38
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp16
-rw-r--r--llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp23
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/LoopSimplify.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp119
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp17
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp66
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp41
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h3
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp7
30 files changed, 441 insertions, 370 deletions
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 240d089..7b59c39 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -69,7 +69,6 @@ static const char *const CoroIntrinsics[] = {
"llvm.coro.async.context.dealloc",
"llvm.coro.async.resume",
"llvm.coro.async.size.replace",
- "llvm.coro.async.store_resume",
"llvm.coro.await.suspend.bool",
"llvm.coro.await.suspend.handle",
"llvm.coro.await.suspend.void",
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 96956481..449d64d 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -66,19 +66,19 @@ static cl::opt<unsigned> MaxCodeSizeGrowth(
"Maximum codesize growth allowed per function"));
static cl::opt<unsigned> MinCodeSizeSavings(
- "funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc(
- "Reject specializations whose codesize savings are less than this"
- "much percent of the original function size"));
+ "funcspec-min-codesize-savings", cl::init(20), cl::Hidden,
+ cl::desc("Reject specializations whose codesize savings are less than this "
+ "much percent of the original function size"));
static cl::opt<unsigned> MinLatencySavings(
"funcspec-min-latency-savings", cl::init(40), cl::Hidden,
- cl::desc("Reject specializations whose latency savings are less than this"
+ cl::desc("Reject specializations whose latency savings are less than this "
"much percent of the original function size"));
static cl::opt<unsigned> MinInliningBonus(
- "funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc(
- "Reject specializations whose inlining bonus is less than this"
- "much percent of the original function size"));
+ "funcspec-min-inlining-bonus", cl::init(300), cl::Hidden,
+ cl::desc("Reject specializations whose inlining bonus is less than this "
+ "much percent of the original function size"));
static cl::opt<bool> SpecializeOnAddress(
"funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 16a80e9..78cd249 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -105,7 +105,7 @@ static cl::opt<int> ColdCCRelFreq(
"coldcc-rel-freq", cl::Hidden, cl::init(2),
cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
- "entry frequency, for a call site to be considered cold for enabling"
+ "entry frequency, for a call site to be considered cold for enabling "
"coldcc"));
/// Is this global variable possibly used by a leak checker as a root? If so,
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 1bf7ff4..016db55 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -122,6 +122,20 @@ static cl::opt<unsigned>
cl::desc("Max depth to recursively search for missing "
"frames through tail calls."));
+// By default enable cloning of callsites involved with recursive cycles
+static cl::opt<bool> AllowRecursiveCallsites(
+ "memprof-allow-recursive-callsites", cl::init(true), cl::Hidden,
+ cl::desc("Allow cloning of callsites involved in recursive cycles"));
+
+// When disabled, try to detect and prevent cloning of recursive contexts.
+// This is only necessary until we support cloning through recursive cycles.
+// Leave on by default for now, as disabling requires a little bit of compile
+// time overhead and doesn't affect correctness, it will just inflate the cold
+// hinted bytes reporting a bit when -memprof-report-hinted-sizes is enabled.
+static cl::opt<bool> AllowRecursiveContexts(
+ "memprof-allow-recursive-contexts", cl::init(true), cl::Hidden,
+ cl::desc("Allow cloning of contexts through recursive cycles"));
+
namespace llvm {
cl::opt<bool> EnableMemProfContextDisambiguation(
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
@@ -1236,9 +1250,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
StackEntryIdToContextNodeMap[StackId] = StackNode;
StackNode->OrigStackOrAllocId = StackId;
}
- auto Ins = StackIdSet.insert(StackId);
- if (!Ins.second)
- StackNode->Recursive = true;
+ // Marking a node recursive will prevent its cloning completely, even for
+ // non-recursive contexts flowing through it.
+ if (!AllowRecursiveCallsites) {
+ auto Ins = StackIdSet.insert(StackId);
+ if (!Ins.second)
+ StackNode->Recursive = true;
+ }
StackNode->AllocTypes |= (uint8_t)AllocType;
PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);
PrevNode = StackNode;
@@ -1375,8 +1393,11 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
set_union(CallerEdgeContextIds, Edge->ContextIds);
}
// Node can have more context ids than callers if some contexts terminate at
- // node and some are longer.
- assert(NodeContextIds == CallerEdgeContextIds ||
+ // node and some are longer. If we are allowing recursive callsites but
+ // haven't disabled recursive contexts, this will be violated for
+ // incompletely cloned recursive cycles, so skip the checking in that case.
+ assert((AllowRecursiveCallsites && AllowRecursiveContexts) ||
+ NodeContextIds == CallerEdgeContextIds ||
set_is_subset(CallerEdgeContextIds, NodeContextIds));
}
if (Node->CalleeEdges.size()) {
@@ -3370,6 +3391,21 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
assert(Node->AllocTypes != (uint8_t)AllocationType::None);
+ DenseSet<uint32_t> RecursiveContextIds;
+ // If we are allowing recursive callsites, but have also disabled recursive
+ // contexts, look for context ids that show up in multiple caller edges.
+ if (AllowRecursiveCallsites && !AllowRecursiveContexts) {
+ DenseSet<uint32_t> AllCallerContextIds;
+ for (auto &CE : Node->CallerEdges) {
+ // Resize to the largest set of caller context ids, since we know the
+ // final set will be at least that large.
+ AllCallerContextIds.reserve(CE->getContextIds().size());
+ for (auto Id : CE->getContextIds())
+ if (!AllCallerContextIds.insert(Id).second)
+ RecursiveContextIds.insert(Id);
+ }
+ }
+
// Iterate until we find no more opportunities for disambiguating the alloc
// types via cloning. In most cases this loop will terminate once the Node
// has a single allocation type, in which case no more cloning is needed.
@@ -3394,6 +3430,9 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
// allocation.
auto CallerEdgeContextsForAlloc =
set_intersection(CallerEdge->getContextIds(), AllocContextIds);
+ if (!RecursiveContextIds.empty())
+ CallerEdgeContextsForAlloc =
+ set_difference(CallerEdgeContextsForAlloc, RecursiveContextIds);
if (CallerEdgeContextsForAlloc.empty()) {
++EI;
continue;
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index b40ab35..67585e9 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -129,7 +129,7 @@ static cl::opt<bool> PrintModuleBeforeOptimizations(
static cl::opt<bool> AlwaysInlineDeviceFunctions(
"openmp-opt-inline-device",
- cl::desc("Inline all applicible functions on the device."), cl::Hidden,
+ cl::desc("Inline all applicable functions on the device."), cl::Hidden,
cl::init(false));
static cl::opt<bool>
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 603beb3..b978c54 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -162,7 +162,7 @@ static cl::opt<bool> ProfileSampleBlockAccurate(
static cl::opt<bool> ProfileAccurateForSymsInList(
"profile-accurate-for-symsinlist", cl::Hidden, cl::init(true),
cl::desc("For symbols in profile symbol list, regard their profiles to "
- "be accurate. It may be overriden by profile-sample-accurate. "));
+ "be accurate. It may be overridden by profile-sample-accurate. "));
static cl::opt<bool> ProfileMergeInlinee(
"sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
@@ -193,9 +193,10 @@ static cl::opt<bool> ProfileSizeInline(
// and inline the hot functions (that are skipped in this pass).
static cl::opt<bool> DisableSampleLoaderInlining(
"disable-sample-loader-inlining", cl::Hidden, cl::init(false),
- cl::desc("If true, artifically skip inline transformation in sample-loader "
- "pass, and merge (or scale) profiles (as configured by "
- "--sample-profile-merge-inlinee)."));
+ cl::desc(
+ "If true, artificially skip inline transformation in sample-loader "
+ "pass, and merge (or scale) profiles (as configured by "
+ "--sample-profile-merge-inlinee)."));
namespace llvm {
cl::opt<bool>
@@ -255,7 +256,7 @@ static cl::opt<unsigned> PrecentMismatchForStalenessError(
static cl::opt<bool> CallsitePrioritizedInline(
"sample-profile-prioritized-inline", cl::Hidden,
- cl::desc("Use call site prioritized inlining for sample profile loader."
+ cl::desc("Use call site prioritized inlining for sample profile loader. "
"Currently only CSSPGO is supported."));
static cl::opt<bool> UsePreInlinerDecision(
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index e9bb2b8..f82a557 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -39,8 +39,7 @@ static Value *getNewICmpValue(unsigned Code, bool Sign, Value *LHS, Value *RHS,
/// This is the complement of getFCmpCode, which turns an opcode and two
/// operands into either a FCmp instruction, or a true/false constant.
static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
- InstCombiner::BuilderTy &Builder,
- FastMathFlags FMF) {
+ InstCombiner::BuilderTy &Builder, FMFSource FMF) {
FCmpInst::Predicate NewPred;
if (Constant *TorF = getPredForFCmpCode(Code, LHS->getType(), NewPred))
return TorF;
@@ -514,7 +513,8 @@ static Value *foldLogOpOfMaskedICmpsAsymmetric(
/// into a single (icmp(A & X) ==/!= Y).
static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
bool IsLogical,
- InstCombiner::BuilderTy &Builder) {
+ InstCombiner::BuilderTy &Builder,
+ const SimplifyQuery &Q) {
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
std::optional<std::pair<unsigned, unsigned>> MaskPair =
@@ -587,93 +587,107 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
return Builder.CreateICmp(NewCC, NewAnd2, A);
}
- // Remaining cases assume at least that B and D are constant, and depend on
- // their actual values. This isn't strictly necessary, just a "handle the
- // easy cases for now" decision.
const APInt *ConstB, *ConstD;
- if (!match(B, m_APInt(ConstB)) || !match(D, m_APInt(ConstD)))
- return nullptr;
-
- if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) {
- // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
- // (icmp ne (A & B), B) & (icmp ne (A & D), D)
- // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
- // Only valid if one of the masks is a superset of the other (check "B&D" is
- // the same as either B or D).
- APInt NewMask = *ConstB & *ConstD;
- if (NewMask == *ConstB)
- return LHS;
- else if (NewMask == *ConstD)
- return RHS;
- }
-
- if (Mask & AMask_NotAllOnes) {
- // (icmp ne (A & B), B) & (icmp ne (A & D), D)
- // -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
- // Only valid if one of the masks is a superset of the other (check "B|D" is
- // the same as either B or D).
- APInt NewMask = *ConstB | *ConstD;
- if (NewMask == *ConstB)
- return LHS;
- else if (NewMask == *ConstD)
- return RHS;
- }
-
- if (Mask & (BMask_Mixed | BMask_NotMixed)) {
- // Mixed:
- // (icmp eq (A & B), C) & (icmp eq (A & D), E)
- // We already know that B & C == C && D & E == E.
- // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
- // C and E, which are shared by both the mask B and the mask D, don't
- // contradict, then we can transform to
- // -> (icmp eq (A & (B|D)), (C|E))
- // Currently, we only handle the case of B, C, D, and E being constant.
- // We can't simply use C and E because we might actually handle
- // (icmp ne (A & B), B) & (icmp eq (A & D), D)
- // with B and D, having a single bit set.
-
- // NotMixed:
- // (icmp ne (A & B), C) & (icmp ne (A & D), E)
- // -> (icmp ne (A & (B & D)), (C & E))
- // Check the intersection (B & D) for inequality.
- // Assume that (B & D) == B || (B & D) == D, i.e B/D is a subset of D/B
- // and (B & D) & (C ^ E) == 0, bits of C and E, which are shared by both the
- // B and the D, don't contradict.
- // Note that we can assume (~B & C) == 0 && (~D & E) == 0, previous
- // operation should delete these icmps if it hadn't been met.
-
- const APInt *OldConstC, *OldConstE;
- if (!match(C, m_APInt(OldConstC)) || !match(E, m_APInt(OldConstE)))
- return nullptr;
-
- auto FoldBMixed = [&](ICmpInst::Predicate CC, bool IsNot) -> Value * {
- CC = IsNot ? CmpInst::getInversePredicate(CC) : CC;
- const APInt ConstC = PredL != CC ? *ConstB ^ *OldConstC : *OldConstC;
- const APInt ConstE = PredR != CC ? *ConstD ^ *OldConstE : *OldConstE;
+ if (match(B, m_APInt(ConstB)) && match(D, m_APInt(ConstD))) {
+ if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) {
+ // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
+ // Only valid if one of the masks is a superset of the other (check "B&D"
+ // is the same as either B or D).
+ APInt NewMask = *ConstB & *ConstD;
+ if (NewMask == *ConstB)
+ return LHS;
+ if (NewMask == *ConstD)
+ return RHS;
+ }
- if (((*ConstB & *ConstD) & (ConstC ^ ConstE)).getBoolValue())
- return IsNot ? nullptr : ConstantInt::get(LHS->getType(), !IsAnd);
+ if (Mask & AMask_NotAllOnes) {
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
+ // Only valid if one of the masks is a superset of the other (check "B|D"
+ // is the same as either B or D).
+ APInt NewMask = *ConstB | *ConstD;
+ if (NewMask == *ConstB)
+ return LHS;
+ if (NewMask == *ConstD)
+ return RHS;
+ }
- if (IsNot && !ConstB->isSubsetOf(*ConstD) && !ConstD->isSubsetOf(*ConstB))
+ if (Mask & (BMask_Mixed | BMask_NotMixed)) {
+ // Mixed:
+ // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ // We already know that B & C == C && D & E == E.
+ // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
+ // C and E, which are shared by both the mask B and the mask D, don't
+ // contradict, then we can transform to
+ // -> (icmp eq (A & (B|D)), (C|E))
+ // Currently, we only handle the case of B, C, D, and E being constant.
+ // We can't simply use C and E because we might actually handle
+ // (icmp ne (A & B), B) & (icmp eq (A & D), D)
+ // with B and D, having a single bit set.
+
+ // NotMixed:
+ // (icmp ne (A & B), C) & (icmp ne (A & D), E)
+ // -> (icmp ne (A & (B & D)), (C & E))
+ // Check the intersection (B & D) for inequality.
+ // Assume that (B & D) == B || (B & D) == D, i.e B/D is a subset of D/B
+ // and (B & D) & (C ^ E) == 0, bits of C and E, which are shared by both
+ // the B and the D, don't contradict. Note that we can assume (~B & C) ==
+ // 0 && (~D & E) == 0, previous operation should delete these icmps if it
+ // hadn't been met.
+
+ const APInt *OldConstC, *OldConstE;
+ if (!match(C, m_APInt(OldConstC)) || !match(E, m_APInt(OldConstE)))
return nullptr;
- APInt BD, CE;
- if (IsNot) {
- BD = *ConstB & *ConstD;
- CE = ConstC & ConstE;
- } else {
- BD = *ConstB | *ConstD;
- CE = ConstC | ConstE;
- }
- Value *NewAnd = Builder.CreateAnd(A, BD);
- Value *CEVal = ConstantInt::get(A->getType(), CE);
- return Builder.CreateICmp(CC, CEVal, NewAnd);
- };
+ auto FoldBMixed = [&](ICmpInst::Predicate CC, bool IsNot) -> Value * {
+ CC = IsNot ? CmpInst::getInversePredicate(CC) : CC;
+ const APInt ConstC = PredL != CC ? *ConstB ^ *OldConstC : *OldConstC;
+ const APInt ConstE = PredR != CC ? *ConstD ^ *OldConstE : *OldConstE;
+
+ if (((*ConstB & *ConstD) & (ConstC ^ ConstE)).getBoolValue())
+ return IsNot ? nullptr : ConstantInt::get(LHS->getType(), !IsAnd);
+
+ if (IsNot && !ConstB->isSubsetOf(*ConstD) &&
+ !ConstD->isSubsetOf(*ConstB))
+ return nullptr;
+
+ APInt BD, CE;
+ if (IsNot) {
+ BD = *ConstB & *ConstD;
+ CE = ConstC & ConstE;
+ } else {
+ BD = *ConstB | *ConstD;
+ CE = ConstC | ConstE;
+ }
+ Value *NewAnd = Builder.CreateAnd(A, BD);
+ Value *CEVal = ConstantInt::get(A->getType(), CE);
+ return Builder.CreateICmp(CC, CEVal, NewAnd);
+ };
+
+ if (Mask & BMask_Mixed)
+ return FoldBMixed(NewCC, false);
+ if (Mask & BMask_NotMixed) // can be else also
+ return FoldBMixed(NewCC, true);
+ }
+ }
- if (Mask & BMask_Mixed)
- return FoldBMixed(NewCC, false);
- if (Mask & BMask_NotMixed) // can be else also
- return FoldBMixed(NewCC, true);
+ // (icmp eq (A & B), 0) | (icmp eq (A & D), 0)
+ // -> (icmp ne (A & (B|D)), (B|D))
+ // (icmp ne (A & B), 0) & (icmp ne (A & D), 0)
+ // -> (icmp eq (A & (B|D)), (B|D))
+ // iff B and D is known to be a power of two
+ if (Mask & Mask_NotAllZeros &&
+ isKnownToBeAPowerOfTwo(B, /*OrZero=*/false, /*Depth=*/0, Q) &&
+ isKnownToBeAPowerOfTwo(D, /*OrZero=*/false, /*Depth=*/0, Q)) {
+ // If this is a logical and/or, then we must prevent propagation of a
+ // poison value from the RHS by inserting freeze.
+ if (IsLogical)
+ D = Builder.CreateFreeze(D);
+ Value *Mask = Builder.CreateOr(B, D);
+ Value *Masked = Builder.CreateAnd(A, Mask);
+ return Builder.CreateICmp(NewCC, Masked, Mask);
}
return nullptr;
}
@@ -776,46 +790,6 @@ foldAndOrOfICmpsWithPow2AndWithZero(InstCombiner::BuilderTy &Builder,
return Builder.CreateICmp(Pred, And, Op);
}
-// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
-// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
-Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS,
- ICmpInst *RHS,
- Instruction *CxtI,
- bool IsAnd,
- bool IsLogical) {
- CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
- if (LHS->getPredicate() != Pred || RHS->getPredicate() != Pred)
- return nullptr;
-
- if (!match(LHS->getOperand(1), m_Zero()) ||
- !match(RHS->getOperand(1), m_Zero()))
- return nullptr;
-
- Value *L1, *L2, *R1, *R2;
- if (match(LHS->getOperand(0), m_And(m_Value(L1), m_Value(L2))) &&
- match(RHS->getOperand(0), m_And(m_Value(R1), m_Value(R2)))) {
- if (L1 == R2 || L2 == R2)
- std::swap(R1, R2);
- if (L2 == R1)
- std::swap(L1, L2);
-
- if (L1 == R1 &&
- isKnownToBeAPowerOfTwo(L2, false, 0, CxtI) &&
- isKnownToBeAPowerOfTwo(R2, false, 0, CxtI)) {
- // If this is a logical and/or, then we must prevent propagation of a
- // poison value from the RHS by inserting freeze.
- if (IsLogical)
- R2 = Builder.CreateFreeze(R2);
- Value *Mask = Builder.CreateOr(L2, R2);
- Value *Masked = Builder.CreateAnd(L1, Mask);
- auto NewPred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;
- return Builder.CreateICmp(NewPred, Masked, Mask);
- }
- }
-
- return nullptr;
-}
-
/// General pattern:
/// X & Y
///
@@ -1431,8 +1405,7 @@ static Value *matchIsFiniteTest(InstCombiner::BuilderTy &Builder, FCmpInst *LHS,
return nullptr;
return Builder.CreateFCmpFMF(FCmpInst::getOrderedPredicate(PredR), RHS0, RHS1,
- LHS->getFastMathFlags() &
- RHS->getFastMathFlags());
+ FMFSource::intersect(LHS, RHS));
}
Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
@@ -1469,7 +1442,7 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
// Intersect the fast math flags.
// TODO: We can union the fast math flags unless this is a logical select.
return getFCmpValue(NewPred, LHS0, LHS1, Builder,
- LHS->getFastMathFlags() & RHS->getFastMathFlags());
+ FMFSource::intersect(LHS, RHS));
}
// This transform is not valid for a logical select.
@@ -1486,8 +1459,8 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
// Ignore the constants because they are obviously not NANs:
// (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y)
// (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y)
- return Builder.CreateFCmpFMF(
- PredL, LHS0, RHS0, LHS->getFastMathFlags() & RHS->getFastMathFlags());
+ return Builder.CreateFCmpFMF(PredL, LHS0, RHS0,
+ FMFSource::intersect(LHS, RHS));
}
}
@@ -3327,12 +3300,6 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
bool IsLogical) {
const SimplifyQuery Q = SQ.getWithInstruction(&I);
- // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
- // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
- // if K1 and K2 are a one-bit mask.
- if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, &I, IsAnd, IsLogical))
- return V;
-
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
Value *LHS1 = LHS->getOperand(1), *RHS1 = RHS->getOperand(1);
@@ -3359,7 +3326,7 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// handle (roughly):
// (icmp ne (A & B), C) | (icmp ne (A & D), E)
// (icmp eq (A & B), C) & (icmp eq (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder))
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder, Q))
return V;
if (Value *V =
@@ -4964,8 +4931,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
// (A & B) ^ (A | C) --> A ? ~B : C -- There are 4 commuted variants.
if (I.getType()->isIntOrIntVectorTy(1) &&
- match(Op0, m_OneUse(m_LogicalAnd(m_Value(A), m_Value(B)))) &&
- match(Op1, m_OneUse(m_LogicalOr(m_Value(C), m_Value(D))))) {
+ match(&I, m_c_Xor(m_OneUse(m_LogicalAnd(m_Value(A), m_Value(B))),
+ m_OneUse(m_LogicalOr(m_Value(C), m_Value(D)))))) {
bool NeedFreeze = isa<SelectInst>(Op0) && isa<SelectInst>(Op1) && B == D;
if (B == C || B == D)
std::swap(A, B);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 5494c70..c55c40c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2674,9 +2674,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// copysign Mag, (copysign ?, X) --> copysign Mag, X
Value *X;
if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X)))) {
- Value *CopySign = Builder.CreateCopySign(
- Mag, X,
- II->getFastMathFlags() & cast<Instruction>(Sign)->getFastMathFlags());
+ Value *CopySign =
+ Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
return replaceInstUsesWith(*II, CopySign);
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 8b23583..2e45725 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2485,9 +2485,8 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
// icmp ule i64 (shl X, 32), 8589934592 ->
// icmp ule i32 (trunc X, i32), 2 ->
// icmp ult i32 (trunc X, i32), 3
- if (auto FlippedStrictness =
- InstCombiner::getFlippedStrictnessPredicateAndConstant(
- Pred, ConstantInt::get(ShType->getContext(), C))) {
+ if (auto FlippedStrictness = getFlippedStrictnessPredicateAndConstant(
+ Pred, ConstantInt::get(ShType->getContext(), C))) {
CmpPred = FlippedStrictness->first;
RHSC = cast<ConstantInt>(FlippedStrictness->second)->getValue();
}
@@ -3091,12 +3090,12 @@ Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
unsigned BW = C.getBitWidth();
std::bitset<4> Table;
auto ComputeTable = [&](bool Op0Val, bool Op1Val) {
- int Res = 0;
+ APInt Res(BW, 0);
if (Op0Val)
- Res += isa<ZExtInst>(Ext0) ? 1 : -1;
+ Res += APInt(BW, isa<ZExtInst>(Ext0) ? 1 : -1, /*isSigned=*/true);
if (Op1Val)
- Res += isa<ZExtInst>(Ext1) ? 1 : -1;
- return ICmpInst::compare(APInt(BW, Res, true), C, Pred);
+ Res += APInt(BW, isa<ZExtInst>(Ext1) ? 1 : -1, /*isSigned=*/true);
+ return ICmpInst::compare(Res, C, Pred);
};
Table[0] = ComputeTable(false, false);
@@ -3280,8 +3279,7 @@ bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
if (PredB == ICmpInst::ICMP_SGT && isa<Constant>(RHS2)) {
// x sgt C-1 <--> x sge C <--> not(x slt C)
auto FlippedStrictness =
- InstCombiner::getFlippedStrictnessPredicateAndConstant(
- PredB, cast<Constant>(RHS2));
+ getFlippedStrictnessPredicateAndConstant(PredB, cast<Constant>(RHS2));
if (!FlippedStrictness)
return false;
assert(FlippedStrictness->first == ICmpInst::ICMP_SGE &&
@@ -6908,79 +6906,6 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) {
return nullptr;
}
-std::optional<std::pair<CmpPredicate, Constant *>>
-InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpPredicate Pred,
- Constant *C) {
- assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) &&
- "Only for relational integer predicates.");
-
- Type *Type = C->getType();
- bool IsSigned = ICmpInst::isSigned(Pred);
-
- CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred);
- bool WillIncrement =
- UnsignedPred == ICmpInst::ICMP_ULE || UnsignedPred == ICmpInst::ICMP_UGT;
-
- // Check if the constant operand can be safely incremented/decremented
- // without overflowing/underflowing.
- auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) {
- return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned);
- };
-
- Constant *SafeReplacementConstant = nullptr;
- if (auto *CI = dyn_cast<ConstantInt>(C)) {
- // Bail out if the constant can't be safely incremented/decremented.
- if (!ConstantIsOk(CI))
- return std::nullopt;
- } else if (auto *FVTy = dyn_cast<FixedVectorType>(Type)) {
- unsigned NumElts = FVTy->getNumElements();
- for (unsigned i = 0; i != NumElts; ++i) {
- Constant *Elt = C->getAggregateElement(i);
- if (!Elt)
- return std::nullopt;
-
- if (isa<UndefValue>(Elt))
- continue;
-
- // Bail out if we can't determine if this constant is min/max or if we
- // know that this constant is min/max.
- auto *CI = dyn_cast<ConstantInt>(Elt);
- if (!CI || !ConstantIsOk(CI))
- return std::nullopt;
-
- if (!SafeReplacementConstant)
- SafeReplacementConstant = CI;
- }
- } else if (isa<VectorType>(C->getType())) {
- // Handle scalable splat
- Value *SplatC = C->getSplatValue();
- auto *CI = dyn_cast_or_null<ConstantInt>(SplatC);
- // Bail out if the constant can't be safely incremented/decremented.
- if (!CI || !ConstantIsOk(CI))
- return std::nullopt;
- } else {
- // ConstantExpr?
- return std::nullopt;
- }
-
- // It may not be safe to change a compare predicate in the presence of
- // undefined elements, so replace those elements with the first safe constant
- // that we found.
- // TODO: in case of poison, it is safe; let's replace undefs only.
- if (C->containsUndefOrPoisonElement()) {
- assert(SafeReplacementConstant && "Replacement constant not set");
- C = Constant::replaceUndefsWith(C, SafeReplacementConstant);
- }
-
- CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred);
-
- // Increment or decrement the constant.
- Constant *OneOrNegOne = ConstantInt::get(Type, WillIncrement ? 1 : -1, true);
- Constant *NewC = ConstantExpr::getAdd(C, OneOrNegOne);
-
- return std::make_pair(NewPred, NewC);
-}
-
/// If we have an icmp le or icmp ge instruction with a constant operand, turn
/// it into the appropriate icmp lt or icmp gt instruction. This transform
/// allows them to be folded in visitICmpInst.
@@ -6996,8 +6921,7 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
if (!Op1C)
return nullptr;
- auto FlippedStrictness =
- InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, Op1C);
+ auto FlippedStrictness = getFlippedStrictnessPredicateAndConstant(Pred, Op1C);
if (!FlippedStrictness)
return nullptr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index b31ae37..f6992119 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -435,9 +435,6 @@ private:
Instruction *
canonicalizeConditionalNegationViaMathToSelect(BinaryOperator &i);
- Value *foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
- Instruction *CxtI, bool IsAnd,
- bool IsLogical = false);
Value *matchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D,
bool InvertFalseVal = false);
Value *getSelectCondition(Value *A, Value *B, bool ABIsTheSame);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 272a194..80308bf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -765,33 +765,14 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
LoadInst *NewLI =
new LoadInst(FirstLI->getType(), NewPN, "", IsVolatile, LoadAlignment);
-
- unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_range,
- LLVMContext::MD_invariant_load,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_nonnull,
- LLVMContext::MD_align,
- LLVMContext::MD_dereferenceable,
- LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_access_group,
- LLVMContext::MD_noundef,
- };
-
- for (unsigned ID : KnownIDs)
- NewLI->setMetadata(ID, FirstLI->getMetadata(ID));
+ NewLI->copyMetadata(*FirstLI);
// Add all operands to the new PHI and combine TBAA metadata.
for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
BasicBlock *BB = std::get<0>(Incoming);
Value *V = std::get<1>(Incoming);
LoadInst *LI = cast<LoadInst>(V);
- // FIXME: https://github.com/llvm/llvm-project/issues/121495
- // Call combineMetadataForCSE instead, so that an explicit set of KnownIDs
- // doesn't need to be maintained here.
- combineMetadata(NewLI, LI, KnownIDs, true);
+ combineMetadataForCSE(NewLI, LI, true);
Value *NewInVal = LI->getOperand(0);
if (NewInVal != InVal)
InVal = nullptr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 7fd91c7..1eca177 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1689,8 +1689,7 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp,
return nullptr;
// Check the constant we'd have with flipped-strictness predicate.
- auto FlippedStrictness =
- InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, C0);
+ auto FlippedStrictness = getFlippedStrictnessPredicateAndConstant(Pred, C0);
if (!FlippedStrictness)
return nullptr;
@@ -1970,8 +1969,7 @@ static Value *foldSelectWithConstOpToBinOp(ICmpInst *Cmp, Value *TrueVal,
Value *RHS;
SelectPatternFlavor SPF;
const DataLayout &DL = BOp->getDataLayout();
- auto Flipped =
- InstCombiner::getFlippedStrictnessPredicateAndConstant(Predicate, C1);
+ auto Flipped = getFlippedStrictnessPredicateAndConstant(Predicate, C1);
if (C3 == ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL)) {
SPF = getSelectPattern(Predicate).Flavor;
@@ -2823,9 +2821,9 @@ static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC,
// %cnd = icmp slt i32 %rem, 0
// %add = add i32 %rem, %n
// %sel = select i1 %cnd, i32 %add, i32 %rem
- if (match(TrueVal, m_Add(m_Specific(RemRes), m_Value(Remainder))) &&
+ if (match(TrueVal, m_c_Add(m_Specific(RemRes), m_Value(Remainder))) &&
match(RemRes, m_SRem(m_Value(Op), m_Specific(Remainder))) &&
- IC.isKnownToBeAPowerOfTwo(Remainder, /*OrZero*/ true) &&
+ IC.isKnownToBeAPowerOfTwo(Remainder, /*OrZero=*/true) &&
FalseVal == RemRes)
return FoldToBitwiseAnd(Remainder);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index f63de1f..2fb60ef 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -939,12 +939,11 @@ Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
return nullptr;
if (!match(I.getOperand(1 - ShOpnum),
- m_BinOp(m_Value(ShiftedX), m_Value(Mask))))
+ m_c_BinOp(m_CombineAnd(
+ m_OneUse(m_Shift(m_Value(X), m_Specific(Shift))),
+ m_Value(ShiftedX)),
+ m_Value(Mask))))
return nullptr;
-
- if (!match(ShiftedX, m_OneUse(m_Shift(m_Value(X), m_Specific(Shift)))))
- return nullptr;
-
// Make sure we are matching instruction shifts and not ConstantExpr
auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
auto *IX = dyn_cast<Instruction>(ShiftedX);
@@ -1822,12 +1821,29 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN,
continue;
}
- // If the only use of phi is comparing it with a constant then we can
- // put this comparison in the incoming BB directly after a ucmp/scmp call
- // because we know that it will simplify to a single icmp.
- const APInt *Ignored;
- if (isa<CmpIntrinsic>(InVal) && InVal->hasOneUser() &&
- match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored)))) {
+ // Handle some cases that can't be fully simplified, but where we know that
+ // the two instructions will fold into one.
+ auto WillFold = [&]() {
+ if (!InVal->hasOneUser())
+ return false;
+
+ // icmp of ucmp/scmp with constant will fold to icmp.
+ const APInt *Ignored;
+ if (isa<CmpIntrinsic>(InVal) &&
+ match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored))))
+ return true;
+
+ // icmp eq zext(bool), 0 will fold to !bool.
+ if (isa<ZExtInst>(InVal) &&
+ cast<ZExtInst>(InVal)->getSrcTy()->isIntOrIntVectorTy(1) &&
+ match(&I,
+ m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(PN), m_Zero())))
+ return true;
+
+ return false;
+ };
+
+ if (WillFold()) {
OpsToMoveUseToIncomingBB.push_back(i);
NewPhiValues.push_back(nullptr);
continue;
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 530061e..2031728 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -192,7 +192,7 @@ static cl::opt<bool>
cl::Hidden);
static cl::opt<int> ClHotPercentileCutoff("hwasan-percentile-cutoff-hot",
- cl::desc("Hot percentile cuttoff."));
+ cl::desc("Hot percentile cutoff."));
static cl::opt<float>
ClRandomSkipRate("hwasan-random-rate",
diff --git a/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp b/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp
index 2418030..f27798c 100644
--- a/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp
+++ b/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp
@@ -30,7 +30,7 @@ using namespace llvm;
static cl::opt<int>
HotPercentileCutoff("lower-allow-check-percentile-cutoff-hot",
- cl::desc("Hot percentile cuttoff."));
+ cl::desc("Hot percentile cutoff."));
static cl::opt<float>
RandomRate("lower-allow-check-random-rate",
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 471086c..db4d62e 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -158,11 +158,11 @@ STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
// Command line option to specify the file to read profile from. This is
// mainly used for testing.
-static cl::opt<std::string>
- PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
- cl::value_desc("filename"),
- cl::desc("Specify the path of profile data file. This is"
- "mainly for test purpose."));
+static cl::opt<std::string> PGOTestProfileFile(
+ "pgo-test-profile-file", cl::init(""), cl::Hidden,
+ cl::value_desc("filename"),
+ cl::desc("Specify the path of profile data file. This is "
+ "mainly for test purpose."));
static cl::opt<std::string> PGOTestProfileRemappingFile(
"pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
cl::value_desc("filename"),
@@ -186,7 +186,7 @@ static cl::opt<unsigned> MaxNumAnnotations(
// to write to the metadata for a single memop intrinsic.
static cl::opt<unsigned> MaxNumMemOPAnnotations(
"memop-max-annotations", cl::init(4), cl::Hidden,
- cl::desc("Max number of preicise value annotations for a single memop"
+ cl::desc("Max number of precise value annotations for a single memop"
"intrinsic"));
// Command line option to control appending FunctionHash to the name of a COMDAT
@@ -291,13 +291,13 @@ static cl::opt<bool> PGOVerifyHotBFI(
cl::desc("Print out the non-match BFI count if a hot raw profile count "
"becomes non-hot, or a cold raw profile count becomes hot. "
"The print is enabled under -Rpass-analysis=pgo, or "
- "internal option -pass-remakrs-analysis=pgo."));
+ "internal option -pass-remarks-analysis=pgo."));
static cl::opt<bool> PGOVerifyBFI(
"pgo-verify-bfi", cl::init(false), cl::Hidden,
cl::desc("Print out mismatched BFI counts after setting profile metadata "
"The print is enabled under -Rpass-analysis=pgo, or "
- "internal option -pass-remakrs-analysis=pgo."));
+ "internal option -pass-remarks-analysis=pgo."));
static cl::opt<unsigned> PGOVerifyBFIRatio(
"pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index 1961095..9cd81f3 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -70,7 +70,7 @@ namespace {
/// violations.
struct TypeSanitizer {
TypeSanitizer(Module &M);
- bool run(Function &F, const TargetLibraryInfo &TLI);
+ bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
void instrumentGlobals(Module &M);
private:
@@ -510,7 +510,8 @@ void collectMemAccessInfo(
}
}
-bool TypeSanitizer::run(Function &F, const TargetLibraryInfo &TLI) {
+bool TypeSanitizer::sanitizeFunction(Function &F,
+ const TargetLibraryInfo &TLI) {
// This is required to prevent instrumenting call to __tysan_init from within
// the module constructor.
if (&F == TysanCtorFunction.getCallee() || &F == TysanGlobalsSetTypeFunction)
@@ -876,15 +877,8 @@ bool TypeSanitizer::instrumentMemInst(Value *V, Instruction *ShadowBase,
return true;
}
-PreservedAnalyses TypeSanitizerPass::run(Function &F,
- FunctionAnalysisManager &FAM) {
- TypeSanitizer TySan(*F.getParent());
- TySan.run(F, FAM.getResult<TargetLibraryAnalysis>(F));
- return PreservedAnalyses::none();
-}
-
-PreservedAnalyses ModuleTypeSanitizerPass::run(Module &M,
- ModuleAnalysisManager &AM) {
+PreservedAnalyses TypeSanitizerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
Function *TysanCtorFunction;
std::tie(TysanCtorFunction, std::ignore) =
createSanitizerCtorAndInitFunctions(M, kTysanModuleCtorName,
@@ -894,5 +888,12 @@ PreservedAnalyses ModuleTypeSanitizerPass::run(Module &M,
TypeSanitizer TySan(M);
TySan.instrumentGlobals(M);
appendToGlobalCtors(M, TysanCtorFunction, 0);
+
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ for (Function &F : M) {
+ const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ TySan.sanitizeFunction(F, TLI);
+ }
+
return PreservedAnalyses::none();
}
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index ba1c224..3c82eed 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -128,7 +128,7 @@ static cl::opt<bool, true>
static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
- cl::desc("Use loop idiom recognition code size heuristics when compiling"
+ cl::desc("Use loop idiom recognition code size heuristics when compiling "
"with -Os/-Oz"),
cl::init(true), cl::Hidden);
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 260cc72..0903488 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -104,7 +104,7 @@ static cl::opt<unsigned> UnrollMaxPercentThresholdBoost(
static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
"unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
- cl::desc("Don't allow loop unrolling to simulate more than this number of"
+ cl::desc("Don't allow loop unrolling to simulate more than this number of "
"iterations when checking full unroll profitability"));
static cl::opt<unsigned> UnrollCount(
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index f58dcb5..6e91c4f 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -95,7 +95,7 @@ static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable";
/// invariant instructions in a loop.
static cl::opt<float>
LVInvarThreshold("licm-versioning-invariant-threshold",
- cl::desc("LoopVersioningLICM's minimum allowed percentage"
+ cl::desc("LoopVersioningLICM's minimum allowed percentage "
"of possible invariant instructions per loop"),
cl::init(25), cl::Hidden);
diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
index 1d4f561..b499ef8 100644
--- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -28,8 +28,8 @@ using namespace llvm;
namespace llvm {
cl::opt<bool> ShouldPreserveAllAttributes(
"assume-preserve-all", cl::init(false), cl::Hidden,
- cl::desc("enable preservation of all attrbitues. even those that are "
- "unlikely to be usefull"));
+ cl::desc("enable preservation of all attributes. even those that are "
+ "unlikely to be useful"));
cl::opt<bool> EnableKnowledgeRetention(
"enable-knowledge-retention", cl::init(false), cl::Hidden,
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index d829864..b3f9f76 100644
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -778,7 +778,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops",
- false, true)
+ false, false)
// Publicly exposed interface to pass...
char &llvm::LoopSimplifyID = LoopSimplify::ID;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 03dc6c1..e367b01 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -96,8 +96,9 @@ using namespace PatternMatch;
cl::opt<bool> llvm::RequireAndPreserveDomTree(
"simplifycfg-require-and-preserve-domtree", cl::Hidden,
- cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
- "into preserving DomTree,"));
+ cl::desc(
+ "Temporary development switch used to gradually uplift SimplifyCFG "
+ "into preserving DomTree,"));
// Chosen as 2 so as to be cheap, but still to have enough power to fold
// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
@@ -126,7 +127,7 @@ static cl::opt<bool> HoistLoadsStoresWithCondFaulting(
static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
"hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
- cl::desc("Control the maximal conditonal load/store that we are willing "
+ cl::desc("Control the maximal conditional load/store that we are willing "
"to speculatively execute to eliminate conditional branch "
"(default = 6)"));
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 02ec1d5..9e81573 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -324,6 +324,11 @@ private:
Instruction *ChainElem, Instruction *ChainBegin,
const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets);
+ /// Merges the equivalence classes if they have underlying objects that differ
+ /// by one level of indirection (i.e., one is a getelementptr and the other is
+ /// the base pointer in that getelementptr).
+ void mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const;
+
/// Collects loads and stores grouped by "equivalence class", where:
/// - all elements in an eq class are a load or all are a store,
/// - they all load/store the same element size (it's OK to have e.g. i8 and
@@ -1305,6 +1310,119 @@ std::optional<APInt> Vectorizer::getConstantOffsetSelects(
return std::nullopt;
}
+void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const {
+ if (EQClasses.size() < 2) // There is nothing to merge.
+ return;
+
+ // The reduced key has all elements of the ECClassKey except the underlying
+ // object. Check that EqClassKey has 4 elements and define the reduced key.
+ static_assert(std::tuple_size_v<EqClassKey> == 4,
+ "EqClassKey has changed - EqClassReducedKey needs changes too");
+ using EqClassReducedKey =
+ std::tuple<std::tuple_element_t<1, EqClassKey> /* AddrSpace */,
+ std::tuple_element_t<2, EqClassKey> /* Element size */,
+ std::tuple_element_t<3, EqClassKey> /* IsLoad; */>;
+ using ECReducedKeyToUnderlyingObjectMap =
+ MapVector<EqClassReducedKey,
+ SmallPtrSet<std::tuple_element_t<0, EqClassKey>, 4>>;
+
+ // Form a map from the reduced key (without the underlying object) to the
+ // underlying objects: 1 reduced key to many underlying objects, to form
+ // groups of potentially merge-able equivalence classes.
+ ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;
+ bool FoundPotentiallyOptimizableEC = false;
+ for (const auto &EC : EQClasses) {
+ const auto &Key = EC.first;
+ EqClassReducedKey RedKey{std::get<1>(Key), std::get<2>(Key),
+ std::get<3>(Key)};
+ RedKeyToUOMap[RedKey].insert(std::get<0>(Key));
+ if (RedKeyToUOMap[RedKey].size() > 1)
+ FoundPotentiallyOptimizableEC = true;
+ }
+ if (!FoundPotentiallyOptimizableEC)
+ return;
+
+ LLVM_DEBUG({
+ dbgs() << "LSV: mergeEquivalenceClasses: before merging:\n";
+ for (const auto &EC : EQClasses) {
+ dbgs() << " Key: {" << EC.first << "}\n";
+ for (const auto &Inst : EC.second)
+ dbgs() << " Inst: " << *Inst << '\n';
+ }
+ });
+ LLVM_DEBUG({
+ dbgs() << "LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";
+ for (const auto &RedKeyToUO : RedKeyToUOMap) {
+ dbgs() << " Reduced key: {" << std::get<0>(RedKeyToUO.first) << ", "
+ << std::get<1>(RedKeyToUO.first) << ", "
+ << static_cast<int>(std::get<2>(RedKeyToUO.first)) << "} --> "
+ << RedKeyToUO.second.size() << " underlying objects:\n";
+ for (auto UObject : RedKeyToUO.second)
+ dbgs() << " " << *UObject << '\n';
+ }
+ });
+
+ using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;
+
+ // Compute the ultimate targets for a set of underlying objects.
+ auto GetUltimateTargets =
+ [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {
+ UObjectToUObjectMap IndirectionMap;
+ for (const auto *UObject : UObjects) {
+ const unsigned MaxLookupDepth = 1; // look for 1-level indirections only
+ const auto *UltimateTarget = getUnderlyingObject(UObject, MaxLookupDepth);
+ if (UltimateTarget != UObject)
+ IndirectionMap[UObject] = UltimateTarget;
+ }
+ UObjectToUObjectMap UltimateTargetsMap;
+ for (const auto *UObject : UObjects) {
+ auto Target = UObject;
+ auto It = IndirectionMap.find(Target);
+ for (; It != IndirectionMap.end(); It = IndirectionMap.find(Target))
+ Target = It->second;
+ UltimateTargetsMap[UObject] = Target;
+ }
+ return UltimateTargetsMap;
+ };
+
+ // For each item in RedKeyToUOMap, if it has more than one underlying object,
+ // try to merge the equivalence classes.
+ for (auto &[RedKey, UObjects] : RedKeyToUOMap) {
+ if (UObjects.size() < 2)
+ continue;
+ auto UTMap = GetUltimateTargets(UObjects);
+ for (const auto &[UObject, UltimateTarget] : UTMap) {
+ if (UObject == UltimateTarget)
+ continue;
+
+ EqClassKey KeyFrom{UObject, std::get<0>(RedKey), std::get<1>(RedKey),
+ std::get<2>(RedKey)};
+ EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),
+ std::get<2>(RedKey)};
+ // The entry for KeyFrom is guarantted to exist, unlike KeyTo. Thus,
+ // request the reference to the instructions vector for KeyTo first.
+ const auto &VecTo = EQClasses[KeyTo];
+ const auto &VecFrom = EQClasses[KeyFrom];
+ SmallVector<Instruction *, 8> MergedVec;
+ std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),
+ std::back_inserter(MergedVec),
+ [](Instruction *A, Instruction *B) {
+ return A && B && A->comesBefore(B);
+ });
+ EQClasses[KeyTo] = std::move(MergedVec);
+ EQClasses.erase(KeyFrom);
+ }
+ }
+ LLVM_DEBUG({
+ dbgs() << "LSV: mergeEquivalenceClasses: after merging:\n";
+ for (const auto &EC : EQClasses) {
+ dbgs() << " Key: {" << EC.first << "}\n";
+ for (const auto &Inst : EC.second)
+ dbgs() << " Inst: " << *Inst << '\n';
+ }
+ });
+}
+
EquivalenceClassMap
Vectorizer::collectEquivalenceClasses(BasicBlock::iterator Begin,
BasicBlock::iterator End) {
@@ -1377,6 +1495,7 @@ Vectorizer::collectEquivalenceClasses(BasicBlock::iterator Begin,
.emplace_back(&I);
}
+ mergeEquivalenceClasses(Ret);
return Ret;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e0f629e..47866da 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8354,17 +8354,22 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
auto *GEP = dyn_cast<GetElementPtrInst>(
Ptr->getUnderlyingValue()->stripPointerCasts());
VPSingleDefRecipe *VectorPtr;
- if (Reverse)
+ if (Reverse) {
+ // When folding the tail, we may compute an address that we don't in the
+ // original scalar loop and it may not be inbounds. Drop Inbounds in that
+ // case.
+ GEPNoWrapFlags Flags =
+ (CM.foldTailByMasking() || !GEP || !GEP->isInBounds())
+ ? GEPNoWrapFlags::none()
+ : GEPNoWrapFlags::inBounds();
VectorPtr = new VPReverseVectorPointerRecipe(
- Ptr, &Plan.getVF(), getLoadStoreType(I),
- GEP && GEP->isInBounds() ? GEPNoWrapFlags::inBounds()
- : GEPNoWrapFlags::none(),
- I->getDebugLoc());
- else
+ Ptr, &Plan.getVF(), getLoadStoreType(I), Flags, I->getDebugLoc());
+ } else {
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
GEP ? GEP->getNoWrapFlags()
: GEPNoWrapFlags::none(),
I->getDebugLoc());
+ }
Builder.getInsertBlock()->appendRecipe(VectorPtr);
Ptr = VectorPtr;
}
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c4582df..36fed89 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -104,6 +104,7 @@
using namespace llvm;
using namespace llvm::PatternMatch;
using namespace slpvectorizer;
+using namespace std::placeholders;
#define SV_NAME "slp-vectorizer"
#define DEBUG_TYPE "SLP"
@@ -4955,6 +4956,37 @@ getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
}
+/// Correctly creates insert_subvector, checking that the index is multiple of
+/// the subvectors length. Otherwise, generates shuffle using \p Generator or
+/// using default shuffle.
+static Value *createInsertVector(
+ IRBuilderBase &Builder, Value *Vec, Value *V, unsigned Index,
+ function_ref<Value *(Value *, Value *, ArrayRef<int>)> Generator = {}) {
+ const unsigned SubVecVF = getNumElements(V->getType());
+ if (Index % SubVecVF == 0) {
+ Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V,
+ Builder.getInt64(Index));
+ } else {
+ // Create shuffle, insertvector requires that index is multiple of
+ // the subvector length.
+ const unsigned VecVF = getNumElements(Vec->getType());
+ SmallVector<int> Mask(VecVF, PoisonMaskElem);
+ std::iota(Mask.begin(), std::next(Mask.begin(), Index), 0);
+ for (unsigned I : seq<unsigned>(SubVecVF))
+ Mask[I + Index] = I + VecVF;
+ if (Generator) {
+ Vec = Generator(Vec, V, Mask);
+ } else {
+ // 1. Resize V to the size of Vec.
+ SmallVector<int> ResizeMask(VecVF, PoisonMaskElem);
+ std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0);
+ V = Builder.CreateShuffleVector(V, ResizeMask);
+ Vec = Builder.CreateShuffleVector(Vec, V, Mask);
+ }
+ }
+ return Vec;
+}
+
BoUpSLP::LoadsState
BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
SmallVectorImpl<unsigned> &Order,
@@ -5355,11 +5387,10 @@ static bool clusterSortPtrAccesses(ArrayRef<Value *> VL,
SmallPtrSet<Value *, 13> SecondPointers;
Value *P1 = Ptr1;
Value *P2 = Ptr2;
- if (P1 == P2)
- return false;
unsigned Depth = 0;
- while (!FirstPointers.contains(P2) && !SecondPointers.contains(P1) &&
- Depth <= RecursionMaxDepth) {
+ while (!FirstPointers.contains(P2) && !SecondPointers.contains(P1)) {
+ if (P1 == P2 || Depth > RecursionMaxDepth)
+ return false;
FirstPointers.insert(P1);
SecondPointers.insert(P2);
P1 = getUnderlyingObject(P1, /*MaxLookup=*/1);
@@ -13884,9 +13915,8 @@ Value *BoUpSLP::gather(
Instruction *InsElt;
if (auto *VecTy = dyn_cast<FixedVectorType>(Scalar->getType())) {
assert(SLPReVec && "FixedVectorType is not expected.");
- Vec = InsElt = Builder.CreateInsertVector(
- Vec->getType(), Vec, Scalar,
- Builder.getInt64(Pos * VecTy->getNumElements()));
+ Vec = InsElt = cast<Instruction>(createInsertVector(
+ Builder, Vec, Scalar, Pos * getNumElements(VecTy)));
auto *II = dyn_cast<IntrinsicInst>(InsElt);
if (!II || II->getIntrinsicID() != Intrinsic::vector_insert)
return Vec;
@@ -14486,23 +14516,10 @@ public:
V, SimplifyQuery(*R.DL));
}));
unsigned InsertionIndex = Idx * ScalarTyNumElements;
- const unsigned SubVecVF =
- cast<FixedVectorType>(V->getType())->getNumElements();
- if (InsertionIndex % SubVecVF == 0) {
- Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V,
- Builder.getInt64(InsertionIndex));
- } else {
- // Create shuffle, insertvector requires that index is multiple of
- // the subvectors length.
- const unsigned VecVF =
- cast<FixedVectorType>(Vec->getType())->getNumElements();
- SmallVector<int> Mask(VecVF, PoisonMaskElem);
- std::iota(Mask.begin(), Mask.end(), 0);
- for (unsigned I : seq<unsigned>(
- InsertionIndex, (Idx + SubVecVF) * ScalarTyNumElements))
- Mask[I] = I - Idx + VecVF;
- Vec = createShuffle(Vec, V, Mask);
- }
+ Vec = createInsertVector(
+ Builder, Vec, V, InsertionIndex,
+ std::bind(&ShuffleInstructionBuilder::createShuffle, this, _1, _2,
+ _3));
if (!CommonMask.empty()) {
std::iota(
std::next(CommonMask.begin(), InsertionIndex),
@@ -17748,7 +17765,6 @@ bool BoUpSLP::collectValuesToDemote(
BitWidth = std::max(BitWidth, BitWidth1);
return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2);
};
- using namespace std::placeholders;
auto FinalAnalysis = [&]() {
if (!IsProfitableToDemote)
return false;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 9d7bf97..cfbb4ad 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1351,6 +1351,9 @@ public:
}
}
+ /// Returns true if the underlying opcode may read from or write to memory.
+ bool opcodeMayReadOrWriteFromMemory() const;
+
/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 76336ae..e54df8bd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -51,24 +51,7 @@ extern cl::opt<unsigned> ForceTargetInstructionCost;
bool VPRecipeBase::mayWriteToMemory() const {
switch (getVPDefID()) {
case VPInstructionSC:
- if (Instruction::isBinaryOp(cast<VPInstruction>(this)->getOpcode()))
- return false;
- switch (cast<VPInstruction>(this)->getOpcode()) {
- case Instruction::Or:
- case Instruction::ICmp:
- case Instruction::Select:
- case VPInstruction::AnyOf:
- case VPInstruction::Not:
- case VPInstruction::CalculateTripCountMinusVF:
- case VPInstruction::CanonicalIVIncrementForPart:
- case VPInstruction::ExtractFromEnd:
- case VPInstruction::FirstOrderRecurrenceSplice:
- case VPInstruction::LogicalAnd:
- case VPInstruction::PtrAdd:
- return false;
- default:
- return true;
- }
+ return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
case VPInterleaveSC:
return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
case VPWidenStoreEVLSC:
@@ -115,6 +98,8 @@ bool VPRecipeBase::mayWriteToMemory() const {
bool VPRecipeBase::mayReadFromMemory() const {
switch (getVPDefID()) {
+ case VPInstructionSC:
+ return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
case VPWidenLoadEVLSC:
case VPWidenLoadSC:
return true;
@@ -707,6 +692,26 @@ void VPInstruction::execute(VPTransformState &State) {
/*IsScalar*/ GeneratesPerFirstLaneOnly);
}
+bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
+ if (Instruction::isBinaryOp(getOpcode()))
+ return false;
+ switch (getOpcode()) {
+ case Instruction::ICmp:
+ case Instruction::Select:
+ case VPInstruction::AnyOf:
+ case VPInstruction::CalculateTripCountMinusVF:
+ case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::ExtractFromEnd:
+ case VPInstruction::FirstOrderRecurrenceSplice:
+ case VPInstruction::LogicalAnd:
+ case VPInstruction::Not:
+ case VPInstruction::PtrAdd:
+ return false;
+ default:
+ return true;
+ }
+}
+
bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
if (Instruction::isBinaryOp(getOpcode()))
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 9657770..7779442 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -49,7 +49,8 @@ inline bool isUniformAfterVectorization(const VPValue *VPV) {
return all_of(GEP->operands(), isUniformAfterVectorization);
if (auto *VPI = dyn_cast<VPInstruction>(Def))
return VPI->isSingleScalar() || VPI->isVectorToScalar();
- return false;
+ // VPExpandSCEVRecipes must be placed in the entry and are alway uniform.
+ return isa<VPExpandSCEVRecipe>(Def);
}
/// Return true if \p V is a header mask in \p Plan.
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index ea53a1a..1a669b5 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -705,7 +705,8 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
InstructionCost NewCost =
TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy, CostKind) +
- TTI.getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask, CostKind);
+ TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, VecTy, Mask,
+ CostKind);
bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
// If the lengths of the two vectors are not equal,
@@ -2022,9 +2023,7 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
if (Match1)
InnerCost1 = TTI.getInstructionCost(cast<Instruction>(OuterV1), CostKind);
- InstructionCost OuterCost = TTI.getShuffleCost(
- TargetTransformInfo::SK_PermuteTwoSrc, ShuffleImmTy, OuterMask, CostKind,
- 0, nullptr, {OuterV0, OuterV1}, &I);
+ InstructionCost OuterCost = TTI.getInstructionCost(&I, CostKind);
InstructionCost OldCost = InnerCost0 + InnerCost1 + OuterCost;