aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Analysis')
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp179
-rw-r--r--llvm/lib/Analysis/DXILResource.cpp7
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp17
-rw-r--r--llvm/lib/Analysis/IVDescriptors.cpp26
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp2
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp23
-rw-r--r--llvm/lib/Analysis/MemoryDependenceAnalysis.cpp44
-rw-r--r--llvm/lib/Analysis/MemoryProfileInfo.cpp41
-rw-r--r--llvm/lib/Analysis/ProfileSummaryInfo.cpp14
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp21
-rw-r--r--llvm/lib/Analysis/StackLifetime.cpp5
-rw-r--r--llvm/lib/Analysis/TargetLibraryInfo.cpp28
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp2
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp18
14 files changed, 351 insertions, 76 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 9c1c2c6..759c553 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -929,12 +929,11 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
if (!AllConstantInt)
break;
- // TODO: Try to intersect two inrange attributes?
- if (!InRange) {
- InRange = GEP->getInRange();
- if (InRange)
- // Adjust inrange by offset until now.
- InRange = InRange->sextOrTrunc(BitWidth).subtract(Offset);
+ // Adjust inrange offset and intersect inrange attributes
+ if (auto GEPRange = GEP->getInRange()) {
+ auto AdjustedGEPRange = GEPRange->sextOrTrunc(BitWidth).subtract(Offset);
+ InRange =
+ InRange ? InRange->intersectWith(AdjustedGEPRange) : AdjustedGEPRange;
}
Ptr = cast<Constant>(GEP->getOperand(0));
@@ -1801,6 +1800,44 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::nvvm_d2ull_rn:
case Intrinsic::nvvm_d2ull_rp:
case Intrinsic::nvvm_d2ull_rz:
+
+ // NVVM math intrinsics:
+ case Intrinsic::nvvm_ceil_d:
+ case Intrinsic::nvvm_ceil_f:
+ case Intrinsic::nvvm_ceil_ftz_f:
+
+ case Intrinsic::nvvm_fabs:
+ case Intrinsic::nvvm_fabs_ftz:
+
+ case Intrinsic::nvvm_floor_d:
+ case Intrinsic::nvvm_floor_f:
+ case Intrinsic::nvvm_floor_ftz_f:
+
+ case Intrinsic::nvvm_rcp_rm_d:
+ case Intrinsic::nvvm_rcp_rm_f:
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
+ case Intrinsic::nvvm_rcp_rn_d:
+ case Intrinsic::nvvm_rcp_rn_f:
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ case Intrinsic::nvvm_rcp_rp_d:
+ case Intrinsic::nvvm_rcp_rp_f:
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
+ case Intrinsic::nvvm_rcp_rz_d:
+ case Intrinsic::nvvm_rcp_rz_f:
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
+
+ case Intrinsic::nvvm_round_d:
+ case Intrinsic::nvvm_round_f:
+ case Intrinsic::nvvm_round_ftz_f:
+
+ case Intrinsic::nvvm_saturate_d:
+ case Intrinsic::nvvm_saturate_f:
+ case Intrinsic::nvvm_saturate_ftz_f:
+
+ case Intrinsic::nvvm_sqrt_f:
+ case Intrinsic::nvvm_sqrt_rn_d:
+ case Intrinsic::nvvm_sqrt_rn_f:
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
return !Call->isStrictFP();
// Sign operations are actually bitwise operations, they do not raise
@@ -1818,6 +1855,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::nearbyint:
case Intrinsic::rint:
case Intrinsic::canonicalize:
+
// Constrained intrinsics can be folded if FP environment is known
// to compiler.
case Intrinsic::experimental_constrained_fma:
@@ -1971,16 +2009,49 @@ static APFloat FTZPreserveSign(const APFloat &V) {
return V;
}
-Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
- Type *Ty) {
+static APFloat FlushToPositiveZero(const APFloat &V) {
+ if (V.isDenormal())
+ return APFloat::getZero(V.getSemantics(), false);
+ return V;
+}
+
+static APFloat FlushWithDenormKind(const APFloat &V,
+ DenormalMode::DenormalModeKind DenormKind) {
+ assert(DenormKind != DenormalMode::DenormalModeKind::Invalid &&
+ DenormKind != DenormalMode::DenormalModeKind::Dynamic);
+ switch (DenormKind) {
+ case DenormalMode::DenormalModeKind::IEEE:
+ return V;
+ case DenormalMode::DenormalModeKind::PreserveSign:
+ return FTZPreserveSign(V);
+ case DenormalMode::DenormalModeKind::PositiveZero:
+ return FlushToPositiveZero(V);
+ default:
+ llvm_unreachable("Invalid denormal mode!");
+ }
+}
+
+Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, Type *Ty,
+ DenormalMode DenormMode = DenormalMode::getIEEE()) {
+ if (!DenormMode.isValid() ||
+ DenormMode.Input == DenormalMode::DenormalModeKind::Dynamic ||
+ DenormMode.Output == DenormalMode::DenormalModeKind::Dynamic)
+ return nullptr;
+
llvm_fenv_clearexcept();
- double Result = NativeFP(V.convertToDouble());
+ auto Input = FlushWithDenormKind(V, DenormMode.Input);
+ double Result = NativeFP(Input.convertToDouble());
if (llvm_fenv_testexcept()) {
llvm_fenv_clearexcept();
return nullptr;
}
- return GetConstantFoldFPValue(Result, Ty);
+ Constant *Output = GetConstantFoldFPValue(Result, Ty);
+ if (DenormMode.Output == DenormalMode::DenormalModeKind::IEEE)
+ return Output;
+ const auto *CFP = static_cast<ConstantFP *>(Output);
+ const auto Res = FlushWithDenormKind(CFP->getValueAPF(), DenormMode.Output);
+ return ConstantFP::get(Ty->getContext(), Res);
}
#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
@@ -2550,6 +2621,94 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFoldFP(atan, APF, Ty);
case Intrinsic::sqrt:
return ConstantFoldFP(sqrt, APF, Ty);
+
+ // NVVM Intrinsics:
+ case Intrinsic::nvvm_ceil_ftz_f:
+ case Intrinsic::nvvm_ceil_f:
+ case Intrinsic::nvvm_ceil_d:
+ return ConstantFoldFP(
+ ceil, APF, Ty,
+ nvvm::GetNVVMDenromMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ case Intrinsic::nvvm_fabs_ftz:
+ case Intrinsic::nvvm_fabs:
+ return ConstantFoldFP(
+ fabs, APF, Ty,
+ nvvm::GetNVVMDenromMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ case Intrinsic::nvvm_floor_ftz_f:
+ case Intrinsic::nvvm_floor_f:
+ case Intrinsic::nvvm_floor_d:
+ return ConstantFoldFP(
+ floor, APF, Ty,
+ nvvm::GetNVVMDenromMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
+ case Intrinsic::nvvm_rcp_rm_d:
+ case Intrinsic::nvvm_rcp_rm_f:
+ case Intrinsic::nvvm_rcp_rn_d:
+ case Intrinsic::nvvm_rcp_rn_f:
+ case Intrinsic::nvvm_rcp_rp_d:
+ case Intrinsic::nvvm_rcp_rp_f:
+ case Intrinsic::nvvm_rcp_rz_d:
+ case Intrinsic::nvvm_rcp_rz_f: {
+ APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode(IntrinsicID);
+ bool IsFTZ = nvvm::RCPShouldFTZ(IntrinsicID);
+
+ auto Denominator = IsFTZ ? FTZPreserveSign(APF) : APF;
+ APFloat Res = APFloat::getOne(APF.getSemantics());
+ APFloat::opStatus Status = Res.divide(Denominator, RoundMode);
+
+ if (Status == APFloat::opOK || Status == APFloat::opInexact) {
+ if (IsFTZ)
+ Res = FTZPreserveSign(Res);
+ return ConstantFP::get(Ty->getContext(), Res);
+ }
+ return nullptr;
+ }
+
+ case Intrinsic::nvvm_round_ftz_f:
+ case Intrinsic::nvvm_round_f:
+ case Intrinsic::nvvm_round_d: {
+ // Use APFloat implementation instead of native libm call, as some
+ // implementations (e.g. on PPC) do not preserve the sign of negative 0.
+ bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
+ auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
+ V.roundToIntegral(APFloat::rmNearestTiesToAway);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+
+ case Intrinsic::nvvm_saturate_ftz_f:
+ case Intrinsic::nvvm_saturate_d:
+ case Intrinsic::nvvm_saturate_f: {
+ bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
+ auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
+ if (V.isNegative() || V.isZero() || V.isNaN())
+ return ConstantFP::getZero(Ty);
+ APFloat One = APFloat::getOne(APF.getSemantics());
+ if (V > One)
+ return ConstantFP::get(Ty->getContext(), One);
+ return ConstantFP::get(Ty->getContext(), APF);
+ }
+
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
+ case Intrinsic::nvvm_sqrt_f:
+ case Intrinsic::nvvm_sqrt_rn_d:
+ case Intrinsic::nvvm_sqrt_rn_f:
+ if (APF.isNegative())
+ return nullptr;
+ return ConstantFoldFP(
+ sqrt, APF, Ty,
+ nvvm::GetNVVMDenromMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ // AMDGCN Intrinsics:
case Intrinsic::amdgcn_cos:
case Intrinsic::amdgcn_sin: {
double V = getValueAsDouble(Op);
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index 2da6468..1959ab6 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -1079,15 +1079,16 @@ void DXILResourceBindingInfo::populate(Module &M, DXILResourceTypeMap &DRTM) {
// add new space
S = &BS->Spaces.emplace_back(B.Space);
- // the space is full - set flag to report overlapping binding later
- if (S->FreeRanges.empty()) {
+ // The space is full - there are no free slots left, or the rest of the
+ // slots are taken by an unbounded array. Set flag to report overlapping
+ // binding later.
+ if (S->FreeRanges.empty() || S->FreeRanges.back().UpperBound < UINT32_MAX) {
OverlappingBinding = true;
continue;
}
// adjust the last free range lower bound, split it in two, or remove it
BindingRange &LastFreeRange = S->FreeRanges.back();
- assert(LastFreeRange.UpperBound == UINT32_MAX);
if (LastFreeRange.LowerBound == B.LowerBound) {
if (B.UpperBound < UINT32_MAX)
LastFreeRange.LowerBound = B.UpperBound + 1;
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index dd9a44b..f1473b2 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3383,6 +3383,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
SrcSubscripts, DstSubscripts))
return false;
+ assert(isLoopInvariant(SrcBase, SrcLoop) &&
+ isLoopInvariant(DstBase, DstLoop) &&
+ "Expected SrcBase and DstBase to be loop invariant");
+
int Size = SrcSubscripts.size();
LLVM_DEBUG({
dbgs() << "\nSrcSubscripts: ";
@@ -3666,6 +3670,19 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
SCEVUnionPredicate(Assume, *SE));
}
+ // Even if the base pointers are the same, they may not be loop-invariant. It
+ // could lead to incorrect results, as we're analyzing loop-carried
+ // dependencies. Src and Dst can be in different loops, so we need to check
+ // the base pointer is invariant in both loops.
+ Loop *SrcLoop = LI->getLoopFor(Src->getParent());
+ Loop *DstLoop = LI->getLoopFor(Dst->getParent());
+ if (!isLoopInvariant(SrcBase, SrcLoop) ||
+ !isLoopInvariant(DstBase, DstLoop)) {
+ LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
+ return std::make_unique<Dependence>(Src, Dst,
+ SCEVUnionPredicate(Assume, *SE));
+ }
+
uint64_t EltSize = SrcLoc.Size.toRaw();
const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase);
const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase);
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 39f74be..8be5de3 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -941,10 +941,30 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
m_Intrinsic<Intrinsic::minimumnum>(m_Value(), m_Value())) ||
match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(), m_Value()));
};
- if (isIntMinMaxRecurrenceKind(Kind) ||
- (HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
+ if (isIntMinMaxRecurrenceKind(Kind))
return isMinMaxPattern(I, Kind, Prev);
- else if (isFMulAddIntrinsic(I))
+ if (isFPMinMaxRecurrenceKind(Kind)) {
+ InstDesc Res = isMinMaxPattern(I, Kind, Prev);
+ if (!Res.isRecurrence())
+ return InstDesc(false, I);
+ if (HasRequiredFMF())
+ return Res;
+ // We may be able to vectorize FMax/FMin reductions using maxnum/minnum
+ // intrinsics with extra checks ensuring the vector loop handles only
+ // non-NaN inputs.
+ if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) {
+ assert(Kind == RecurKind::FMax &&
+ "unexpected recurrence kind for maxnum");
+ return InstDesc(I, RecurKind::FMaxNum);
+ }
+ if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value()))) {
+ assert(Kind == RecurKind::FMin &&
+ "unexpected recurrence kind for minnum");
+ return InstDesc(I, RecurKind::FMinNum);
+ }
+ return InstDesc(false, I);
+ }
+ if (isFMulAddIntrinsic(I))
return InstDesc(Kind == RecurKind::FMulAdd, I,
I->hasAllowReassoc() ? nullptr : I);
return InstDesc(false, I);
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 82530e7..5907e21 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5366,7 +5366,7 @@ static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
Type *MidTy = CI->getType();
Type *DstTy = Ty;
if (Src->getType() == Ty) {
- auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode());
+ auto FirstOp = CI->getOpcode();
auto SecondOp = static_cast<Instruction::CastOps>(CastOpc);
Type *SrcIntPtrTy =
SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index f3a32d3..14be385 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -589,11 +589,11 @@ void RuntimePointerChecking::groupChecks(
// dependence. Not grouping the checks for a[i] and a[i + 9000] allows
// us to perform an accurate check in this case.
//
- // The above case requires that we have an UnknownDependence between
- // accesses to the same underlying object. This cannot happen unless
- // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
- // is also false. In this case we will use the fallback path and create
- // separate checking groups for all pointers.
+ // In the above case, we have a non-constant distance and an Unknown
+ // dependence between accesses to the same underlying object, and could retry
+ // with runtime checks. Therefore UseDependencies is false. In this case we
+ // will use the fallback path and create separate checking groups for all
+ // pointers.
// If we don't have the dependency partitions, construct a new
// checking pointer group for each pointer. This is also required
@@ -819,7 +819,7 @@ public:
/// perform dependency checking.
///
/// Note that this can later be cleared if we retry memcheck analysis without
- /// dependency checking (i.e. FoundNonConstantDistanceDependence).
+ /// dependency checking (i.e. ShouldRetryWithRuntimeChecks).
bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }
/// We decided that no dependence analysis would be used. Reset the state.
@@ -896,7 +896,7 @@ private:
///
/// Note that, this is different from isDependencyCheckNeeded. When we retry
/// memcheck analysis without dependency checking
- /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
+ /// (i.e. ShouldRetryWithRuntimeChecks), isDependencyCheckNeeded is
/// cleared while this remains set if we have potentially dependent accesses.
bool IsRTCheckAnalysisNeeded = false;
@@ -2079,11 +2079,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
if (StrideAScaled == StrideBScaled)
CommonStride = StrideAScaled;
- // TODO: FoundNonConstantDistanceDependence is used as a necessary condition
- // to consider retrying with runtime checks. Historically, we did not set it
- // when (unscaled) strides were different but there is no inherent reason to.
+ // TODO: Historically, we didn't retry with runtime checks when (unscaled)
+ // strides were different but there is no inherent reason to.
if (!isa<SCEVConstant>(Dist))
- FoundNonConstantDistanceDependence |= StrideAPtrInt == StrideBPtrInt;
+ ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;
// If distance is a SCEVCouldNotCompute, return Unknown immediately.
if (isa<SCEVCouldNotCompute>(Dist)) {
@@ -2712,7 +2711,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
DepsAreSafe =
DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck());
- if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
+ if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeChecks()) {
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
// Clear the dependency checks. We assume they are not needed.
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 3aa9909..2b0f212 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -983,33 +983,37 @@ MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
static void
SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
unsigned NumSortedEntries) {
- switch (Cache.size() - NumSortedEntries) {
- case 0:
- // done, no new entries.
- break;
- case 2: {
- // Two new entries, insert the last one into place.
- NonLocalDepEntry Val = Cache.back();
- Cache.pop_back();
- MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
- Cache.insert(Entry, Val);
- [[fallthrough]];
+
+ // If only one entry, don't sort.
+ if (Cache.size() < 2)
+ return;
+
+ unsigned s = Cache.size() - NumSortedEntries;
+
+ // If the cache is already sorted, don't sort it again.
+ if (s == 0)
+ return;
+
+ // If no entry is sorted, sort the whole cache.
+ if (NumSortedEntries == 0) {
+ llvm::sort(Cache);
+ return;
}
- case 1:
- // One new entry, Just insert the new value at the appropriate position.
- if (Cache.size() != 1) {
+
+ // If the number of unsorted entires is small and the cache size is big, using
+ // insertion sort is faster. Here use Log2_32 to quickly choose the sort
+ // method.
+ if (s < Log2_32(Cache.size())) {
+ while (s > 0) {
NonLocalDepEntry Val = Cache.back();
Cache.pop_back();
MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- llvm::upper_bound(Cache, Val);
+ std::upper_bound(Cache.begin(), Cache.end() - s + 1, Val);
Cache.insert(Entry, Val);
+ s--;
}
- break;
- default:
- // Added many values, do a full scale sort.
+ } else {
llvm::sort(Cache);
- break;
}
}
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index c08024a..b3c8a7d 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -157,6 +157,8 @@ void CallStackTrie::addCallStack(
}
void CallStackTrie::addCallStack(MDNode *MIB) {
+ // Note that we are building this from existing MD_memprof metadata.
+ BuiltFromExistingMetadata = true;
MDNode *StackMD = getMIBStackNode(MIB);
assert(StackMD);
std::vector<uint64_t> CallStack;
@@ -187,8 +189,9 @@ void CallStackTrie::addCallStack(MDNode *MIB) {
static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
AllocationType AllocType,
ArrayRef<ContextTotalSize> ContextSizeInfo,
- const uint64_t MaxColdSize, uint64_t &TotalBytes,
- uint64_t &ColdBytes) {
+ const uint64_t MaxColdSize,
+ bool BuiltFromExistingMetadata,
+ uint64_t &TotalBytes, uint64_t &ColdBytes) {
SmallVector<Metadata *> MIBPayload(
{buildCallstackMetadata(MIBCallStack, Ctx)});
MIBPayload.push_back(
@@ -197,8 +200,9 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
if (ContextSizeInfo.empty()) {
// The profile matcher should have provided context size info if there was a
// MinCallsiteColdBytePercent < 100. Here we check >=100 to gracefully
- // handle a user-provided percent larger than 100.
- assert(MinCallsiteColdBytePercent >= 100);
+ // handle a user-provided percent larger than 100. However, we may not have
+ // this information if we built the Trie from existing MD_memprof metadata.
+ assert(BuiltFromExistingMetadata || MinCallsiteColdBytePercent >= 100);
return MDNode::get(Ctx, MIBPayload);
}
@@ -252,9 +256,19 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
std::vector<Metadata *> &SavedMIBNodes,
unsigned CallerContextLength,
- uint64_t TotalBytes, uint64_t ColdBytes) {
+ uint64_t TotalBytes, uint64_t ColdBytes,
+ bool BuiltFromExistingMetadata) {
const bool MostlyCold =
- MinCallsiteColdBytePercent < 100 &&
+ // If we have built the Trie from existing MD_memprof metadata, we may or
+ // may not have context size information (in which case ColdBytes and
+ // TotalBytes are 0, which is not also guarded against below). Even if we
+ // do have some context size information from the the metadata, we have
+ // already gone through a round of discarding of small non-cold contexts
+ // during matching, and it would be overly aggressive to do it again, and
+ // we also want to maintain the same behavior with and without reporting
+ // of hinted bytes enabled.
+ !BuiltFromExistingMetadata && MinCallsiteColdBytePercent < 100 &&
+ ColdBytes > 0 &&
ColdBytes * 100 >= MinCallsiteColdBytePercent * TotalBytes;
// In the simplest case, with pruning disabled, keep all the new MIB nodes.
@@ -386,9 +400,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
if (hasSingleAllocType(Node->AllocTypes)) {
std::vector<ContextTotalSize> ContextSizeInfo;
collectContextSizeInfo(Node, ContextSizeInfo);
- MIBNodes.push_back(
- createMIBNode(Ctx, MIBCallStack, (AllocationType)Node->AllocTypes,
- ContextSizeInfo, MaxColdSize, TotalBytes, ColdBytes));
+ MIBNodes.push_back(createMIBNode(
+ Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextSizeInfo,
+ MaxColdSize, BuiltFromExistingMetadata, TotalBytes, ColdBytes));
return true;
}
@@ -416,7 +430,8 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
// Pass in the stack length of the MIB nodes added for the immediate caller,
// which is the current stack length plus 1.
saveFilteredNewMIBNodes(NewMIBNodes, MIBNodes, MIBCallStack.size() + 1,
- CallerTotalBytes, CallerColdBytes);
+ CallerTotalBytes, CallerColdBytes,
+ BuiltFromExistingMetadata);
TotalBytes += CallerTotalBytes;
ColdBytes += CallerColdBytes;
@@ -441,9 +456,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
return false;
std::vector<ContextTotalSize> ContextSizeInfo;
collectContextSizeInfo(Node, ContextSizeInfo);
- MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold,
- ContextSizeInfo, MaxColdSize, TotalBytes,
- ColdBytes));
+ MIBNodes.push_back(createMIBNode(
+ Ctx, MIBCallStack, AllocationType::NotCold, ContextSizeInfo, MaxColdSize,
+ BuiltFromExistingMetadata, TotalBytes, ColdBytes));
return true;
}
diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index e8d4e37..f1c3155 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -121,8 +121,18 @@ void ProfileSummaryInfo::computeThresholds() {
ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary);
ColdCountThreshold =
ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary);
- assert(ColdCountThreshold <= HotCountThreshold &&
- "Cold count threshold cannot exceed hot count threshold!");
+ // When the hot and cold thresholds are identical, we would classify
+ // a count value as both hot and cold since we are doing an inclusive check
+ // (see ::is{Hot|Cold}Count(). To avoid this undesirable overlap, ensure the
+ // thresholds are distinct.
+ if (HotCountThreshold == ColdCountThreshold) {
+ if (ColdCountThreshold > 0)
+ (*ColdCountThreshold)--;
+ else
+ (*HotCountThreshold)++;
+ }
+ assert(ColdCountThreshold < HotCountThreshold &&
+ "Cold count threshold should be less than hot count threshold!");
if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) {
HasHugeWorkingSetSize =
HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 24adfa3..0990a0d 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -11418,8 +11418,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred,
XNonConstOp = X;
XFlagsPresent = ExpectedFlags;
}
- if (!isa<SCEVConstant>(XConstOp) ||
- (XFlagsPresent & ExpectedFlags) != ExpectedFlags)
+ if (!isa<SCEVConstant>(XConstOp))
return false;
if (!splitBinaryAdd(Y, YConstOp, YNonConstOp, YFlagsPresent)) {
@@ -11428,12 +11427,20 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred,
YFlagsPresent = ExpectedFlags;
}
- if (!isa<SCEVConstant>(YConstOp) ||
- (YFlagsPresent & ExpectedFlags) != ExpectedFlags)
+ if (YNonConstOp != XNonConstOp)
return false;
- if (YNonConstOp != XNonConstOp)
+ if (!isa<SCEVConstant>(YConstOp))
+ return false;
+
+ // When matching ADDs with NUW flags (and unsigned predicates), only the
+ // second ADD (with the larger constant) requires NUW.
+ if ((YFlagsPresent & ExpectedFlags) != ExpectedFlags)
+ return false;
+ if (ExpectedFlags != SCEV::FlagNUW &&
+ (XFlagsPresent & ExpectedFlags) != ExpectedFlags) {
return false;
+ }
OutC1 = cast<SCEVConstant>(XConstOp)->getAPInt();
OutC2 = cast<SCEVConstant>(YConstOp)->getAPInt();
@@ -11472,7 +11479,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred,
std::swap(LHS, RHS);
[[fallthrough]];
case ICmpInst::ICMP_ULE:
- // (X + C1)<nuw> u<= (X + C2)<nuw> for C1 u<= C2.
+ // (X + C1) u<= (X + C2)<nuw> for C1 u<= C2.
if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNUW) && C1.ule(C2))
return true;
@@ -11482,7 +11489,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred,
std::swap(LHS, RHS);
[[fallthrough]];
case ICmpInst::ICMP_ULT:
- // (X + C1)<nuw> u< (X + C2)<nuw> if C1 u< C2.
+ // (X + C1) u< (X + C2)<nuw> if C1 u< C2.
if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNUW) && C1.ult(C2))
return true;
break;
diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp
index 21f54c7..34a7a04 100644
--- a/llvm/lib/Analysis/StackLifetime.cpp
+++ b/llvm/lib/Analysis/StackLifetime.cpp
@@ -63,10 +63,7 @@ bool StackLifetime::isAliveAfter(const AllocaInst *AI,
// markers has the same size and points to the alloca start.
static const AllocaInst *findMatchingAlloca(const IntrinsicInst &II,
const DataLayout &DL) {
- const AllocaInst *AI = findAllocaForValue(II.getArgOperand(1), true);
- if (!AI)
- return nullptr;
-
+ const AllocaInst *AI = cast<AllocaInst>(II.getArgOperand(1));
auto AllocaSize = AI->getAllocationSize(DL);
if (!AllocaSize)
return nullptr;
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index e475be2..6e92766 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -875,6 +875,34 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_toascii);
}
+ if (T.isOSFreeBSD()) {
+ TLI.setAvailable(LibFunc_dunder_strtok_r);
+ TLI.setAvailable(LibFunc_memalign);
+ TLI.setAvailable(LibFunc_fputc_unlocked);
+ TLI.setAvailable(LibFunc_fputs_unlocked);
+ TLI.setAvailable(LibFunc_fread_unlocked);
+ TLI.setAvailable(LibFunc_fwrite_unlocked);
+ TLI.setAvailable(LibFunc_getc_unlocked);
+ TLI.setAvailable(LibFunc_getchar_unlocked);
+ TLI.setAvailable(LibFunc_putc_unlocked);
+ TLI.setAvailable(LibFunc_putchar_unlocked);
+
+ TLI.setUnavailable(LibFunc___kmpc_alloc_shared);
+ TLI.setUnavailable(LibFunc___kmpc_free_shared);
+ TLI.setUnavailable(LibFunc_dunder_strndup);
+ TLI.setUnavailable(LibFunc_memccpy_chk);
+ TLI.setUnavailable(LibFunc_strlen_chk);
+ TLI.setUnavailable(LibFunc_fmaximum_num);
+ TLI.setUnavailable(LibFunc_fmaximum_numf);
+ TLI.setUnavailable(LibFunc_fmaximum_numl);
+ TLI.setUnavailable(LibFunc_fminimum_num);
+ TLI.setUnavailable(LibFunc_fminimum_numf);
+ TLI.setUnavailable(LibFunc_fminimum_numl);
+ TLI.setUnavailable(LibFunc_roundeven);
+ TLI.setUnavailable(LibFunc_roundevenf);
+ TLI.setUnavailable(LibFunc_roundevenl);
+ }
+
// As currently implemented in clang, NVPTX code has no standard library to
// speak of. Headers provide a standard-ish library implementation, but many
// of the signatures are wrong -- for example, many libm functions are not
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8a470eb..55ba52a 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1423,7 +1423,7 @@ bool TargetTransformInfo::hasArmWideBranch(bool Thumb) const {
return TTIImpl->hasArmWideBranch(Thumb);
}
-uint64_t TargetTransformInfo::getFeatureMask(const Function &F) const {
+APInt TargetTransformInfo::getFeatureMask(const Function &F) const {
return TTIImpl->getFeatureMask(F);
}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 61a322b..af85ce4 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -7912,6 +7912,8 @@ bool llvm::intrinsicPropagatesPoison(Intrinsic::ID IID) {
case Intrinsic::ushl_sat:
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
+ case Intrinsic::umul_fix:
+ case Intrinsic::umul_fix_sat:
case Intrinsic::pow:
case Intrinsic::powi:
case Intrinsic::sin:
@@ -7928,6 +7930,22 @@ bool llvm::intrinsicPropagatesPoison(Intrinsic::ID IID) {
case Intrinsic::atan2:
case Intrinsic::canonicalize:
case Intrinsic::sqrt:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::exp10:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::modf:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::round:
+ case Intrinsic::roundeven:
+ case Intrinsic::lrint:
+ case Intrinsic::llrint:
return true;
default:
return false;