diff options
Diffstat (limited to 'llvm/lib/Analysis')
-rw-r--r-- | llvm/lib/Analysis/ConstantFolding.cpp | 179 | ||||
-rw-r--r-- | llvm/lib/Analysis/DXILResource.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Analysis/DependenceAnalysis.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/Analysis/IVDescriptors.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Analysis/InstructionSimplify.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Analysis/LoopAccessAnalysis.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Analysis/MemoryDependenceAnalysis.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Analysis/MemoryProfileInfo.cpp | 41 | ||||
-rw-r--r-- | llvm/lib/Analysis/ProfileSummaryInfo.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Analysis/ScalarEvolution.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Analysis/StackLifetime.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Analysis/TargetLibraryInfo.cpp | 28 | ||||
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Analysis/ValueTracking.cpp | 18 |
14 files changed, 351 insertions, 76 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 9c1c2c6..759c553 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -929,12 +929,11 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, if (!AllConstantInt) break; - // TODO: Try to intersect two inrange attributes? - if (!InRange) { - InRange = GEP->getInRange(); - if (InRange) - // Adjust inrange by offset until now. - InRange = InRange->sextOrTrunc(BitWidth).subtract(Offset); + // Adjust inrange offset and intersect inrange attributes + if (auto GEPRange = GEP->getInRange()) { + auto AdjustedGEPRange = GEPRange->sextOrTrunc(BitWidth).subtract(Offset); + InRange = + InRange ? InRange->intersectWith(AdjustedGEPRange) : AdjustedGEPRange; } Ptr = cast<Constant>(GEP->getOperand(0)); @@ -1801,6 +1800,44 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::nvvm_d2ull_rn: case Intrinsic::nvvm_d2ull_rp: case Intrinsic::nvvm_d2ull_rz: + + // NVVM math intrinsics: + case Intrinsic::nvvm_ceil_d: + case Intrinsic::nvvm_ceil_f: + case Intrinsic::nvvm_ceil_ftz_f: + + case Intrinsic::nvvm_fabs: + case Intrinsic::nvvm_fabs_ftz: + + case Intrinsic::nvvm_floor_d: + case Intrinsic::nvvm_floor_f: + case Intrinsic::nvvm_floor_ftz_f: + + case Intrinsic::nvvm_rcp_rm_d: + case Intrinsic::nvvm_rcp_rm_f: + case Intrinsic::nvvm_rcp_rm_ftz_f: + case Intrinsic::nvvm_rcp_rn_d: + case Intrinsic::nvvm_rcp_rn_f: + case Intrinsic::nvvm_rcp_rn_ftz_f: + case Intrinsic::nvvm_rcp_rp_d: + case Intrinsic::nvvm_rcp_rp_f: + case Intrinsic::nvvm_rcp_rp_ftz_f: + case Intrinsic::nvvm_rcp_rz_d: + case Intrinsic::nvvm_rcp_rz_f: + case Intrinsic::nvvm_rcp_rz_ftz_f: + + case Intrinsic::nvvm_round_d: + case Intrinsic::nvvm_round_f: + case Intrinsic::nvvm_round_ftz_f: + + case Intrinsic::nvvm_saturate_d: + case Intrinsic::nvvm_saturate_f: + case Intrinsic::nvvm_saturate_ftz_f: + + case Intrinsic::nvvm_sqrt_f: + case Intrinsic::nvvm_sqrt_rn_d: + case Intrinsic::nvvm_sqrt_rn_f: + case Intrinsic::nvvm_sqrt_rn_ftz_f: return !Call->isStrictFP(); // Sign operations are actually bitwise operations, they do not raise @@ -1818,6 +1855,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::nearbyint: case Intrinsic::rint: case Intrinsic::canonicalize: + // Constrained intrinsics can be folded if FP environment is known // to compiler. case Intrinsic::experimental_constrained_fma: @@ -1971,16 +2009,49 @@ static APFloat FTZPreserveSign(const APFloat &V) { return V; } -Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, - Type *Ty) { +static APFloat FlushToPositiveZero(const APFloat &V) { + if (V.isDenormal()) + return APFloat::getZero(V.getSemantics(), false); + return V; +} + +static APFloat FlushWithDenormKind(const APFloat &V, + DenormalMode::DenormalModeKind DenormKind) { + assert(DenormKind != DenormalMode::DenormalModeKind::Invalid && + DenormKind != DenormalMode::DenormalModeKind::Dynamic); + switch (DenormKind) { + case DenormalMode::DenormalModeKind::IEEE: + return V; + case DenormalMode::DenormalModeKind::PreserveSign: + return FTZPreserveSign(V); + case DenormalMode::DenormalModeKind::PositiveZero: + return FlushToPositiveZero(V); + default: + llvm_unreachable("Invalid denormal mode!"); + } +} + +Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, Type *Ty, + DenormalMode DenormMode = DenormalMode::getIEEE()) { + if (!DenormMode.isValid() || + DenormMode.Input == DenormalMode::DenormalModeKind::Dynamic || + DenormMode.Output == DenormalMode::DenormalModeKind::Dynamic) + return nullptr; + llvm_fenv_clearexcept(); - double Result = NativeFP(V.convertToDouble()); + auto Input = FlushWithDenormKind(V, DenormMode.Input); + double Result = NativeFP(Input.convertToDouble()); if (llvm_fenv_testexcept()) { llvm_fenv_clearexcept(); return nullptr; } - return GetConstantFoldFPValue(Result, Ty); + Constant *Output = GetConstantFoldFPValue(Result, Ty); + if (DenormMode.Output == DenormalMode::DenormalModeKind::IEEE) + return Output; + const auto *CFP = static_cast<ConstantFP *>(Output); + const auto Res = FlushWithDenormKind(CFP->getValueAPF(), DenormMode.Output); + return ConstantFP::get(Ty->getContext(), Res); } #if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128) @@ -2550,6 +2621,94 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantFoldFP(atan, APF, Ty); case Intrinsic::sqrt: return ConstantFoldFP(sqrt, APF, Ty); + + // NVVM Intrinsics: + case Intrinsic::nvvm_ceil_ftz_f: + case Intrinsic::nvvm_ceil_f: + case Intrinsic::nvvm_ceil_d: + return ConstantFoldFP( + ceil, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + case Intrinsic::nvvm_fabs_ftz: + case Intrinsic::nvvm_fabs: + return ConstantFoldFP( + fabs, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + case Intrinsic::nvvm_floor_ftz_f: + case Intrinsic::nvvm_floor_f: + case Intrinsic::nvvm_floor_d: + return ConstantFoldFP( + floor, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + case Intrinsic::nvvm_rcp_rm_ftz_f: + case Intrinsic::nvvm_rcp_rn_ftz_f: + case Intrinsic::nvvm_rcp_rp_ftz_f: + case Intrinsic::nvvm_rcp_rz_ftz_f: + case Intrinsic::nvvm_rcp_rm_d: + case Intrinsic::nvvm_rcp_rm_f: + case Intrinsic::nvvm_rcp_rn_d: + case Intrinsic::nvvm_rcp_rn_f: + case Intrinsic::nvvm_rcp_rp_d: + case Intrinsic::nvvm_rcp_rp_f: + case Intrinsic::nvvm_rcp_rz_d: + case Intrinsic::nvvm_rcp_rz_f: { + APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode(IntrinsicID); + bool IsFTZ = nvvm::RCPShouldFTZ(IntrinsicID); + + auto Denominator = IsFTZ ? FTZPreserveSign(APF) : APF; + APFloat Res = APFloat::getOne(APF.getSemantics()); + APFloat::opStatus Status = Res.divide(Denominator, RoundMode); + + if (Status == APFloat::opOK || Status == APFloat::opInexact) { + if (IsFTZ) + Res = FTZPreserveSign(Res); + return ConstantFP::get(Ty->getContext(), Res); + } + return nullptr; + } + + case Intrinsic::nvvm_round_ftz_f: + case Intrinsic::nvvm_round_f: + case Intrinsic::nvvm_round_d: { + // Use APFloat implementation instead of native libm call, as some + // implementations (e.g. on PPC) do not preserve the sign of negative 0. + bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID); + auto V = IsFTZ ? FTZPreserveSign(APF) : APF; + V.roundToIntegral(APFloat::rmNearestTiesToAway); + return ConstantFP::get(Ty->getContext(), V); + } + + case Intrinsic::nvvm_saturate_ftz_f: + case Intrinsic::nvvm_saturate_d: + case Intrinsic::nvvm_saturate_f: { + bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID); + auto V = IsFTZ ? FTZPreserveSign(APF) : APF; + if (V.isNegative() || V.isZero() || V.isNaN()) + return ConstantFP::getZero(Ty); + APFloat One = APFloat::getOne(APF.getSemantics()); + if (V > One) + return ConstantFP::get(Ty->getContext(), One); + return ConstantFP::get(Ty->getContext(), APF); + } + + case Intrinsic::nvvm_sqrt_rn_ftz_f: + case Intrinsic::nvvm_sqrt_f: + case Intrinsic::nvvm_sqrt_rn_d: + case Intrinsic::nvvm_sqrt_rn_f: + if (APF.isNegative()) + return nullptr; + return ConstantFoldFP( + sqrt, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + // AMDGCN Intrinsics: case Intrinsic::amdgcn_cos: case Intrinsic::amdgcn_sin: { double V = getValueAsDouble(Op); diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp index 2da6468..1959ab6 100644 --- a/llvm/lib/Analysis/DXILResource.cpp +++ b/llvm/lib/Analysis/DXILResource.cpp @@ -1079,15 +1079,16 @@ void DXILResourceBindingInfo::populate(Module &M, DXILResourceTypeMap &DRTM) { // add new space S = &BS->Spaces.emplace_back(B.Space); - // the space is full - set flag to report overlapping binding later - if (S->FreeRanges.empty()) { + // The space is full - there are no free slots left, or the rest of the + // slots are taken by an unbounded array. Set flag to report overlapping + // binding later. + if (S->FreeRanges.empty() || S->FreeRanges.back().UpperBound < UINT32_MAX) { OverlappingBinding = true; continue; } // adjust the last free range lower bound, split it in two, or remove it BindingRange &LastFreeRange = S->FreeRanges.back(); - assert(LastFreeRange.UpperBound == UINT32_MAX); if (LastFreeRange.LowerBound == B.LowerBound) { if (B.UpperBound < UINT32_MAX) LastFreeRange.LowerBound = B.UpperBound + 1; diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index dd9a44b..f1473b2 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3383,6 +3383,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SrcSubscripts, DstSubscripts)) return false; + assert(isLoopInvariant(SrcBase, SrcLoop) && + isLoopInvariant(DstBase, DstLoop) && + "Expected SrcBase and DstBase to be loop invariant"); + int Size = SrcSubscripts.size(); LLVM_DEBUG({ dbgs() << "\nSrcSubscripts: "; @@ -3666,6 +3670,19 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, SCEVUnionPredicate(Assume, *SE)); } + // Even if the base pointers are the same, they may not be loop-invariant. It + // could lead to incorrect results, as we're analyzing loop-carried + // dependencies. Src and Dst can be in different loops, so we need to check + // the base pointer is invariant in both loops. + Loop *SrcLoop = LI->getLoopFor(Src->getParent()); + Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + if (!isLoopInvariant(SrcBase, SrcLoop) || + !isLoopInvariant(DstBase, DstLoop)) { + LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n"); + return std::make_unique<Dependence>(Src, Dst, + SCEVUnionPredicate(Assume, *SE)); + } + uint64_t EltSize = SrcLoc.Size.toRaw(); const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase); const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase); diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 39f74be..8be5de3 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -941,10 +941,30 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr( m_Intrinsic<Intrinsic::minimumnum>(m_Value(), m_Value())) || match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(), m_Value())); }; - if (isIntMinMaxRecurrenceKind(Kind) || - (HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind))) + if (isIntMinMaxRecurrenceKind(Kind)) return isMinMaxPattern(I, Kind, Prev); - else if (isFMulAddIntrinsic(I)) + if (isFPMinMaxRecurrenceKind(Kind)) { + InstDesc Res = isMinMaxPattern(I, Kind, Prev); + if (!Res.isRecurrence()) + return InstDesc(false, I); + if (HasRequiredFMF()) + return Res; + // We may be able to vectorize FMax/FMin reductions using maxnum/minnum + // intrinsics with extra checks ensuring the vector loop handles only + // non-NaN inputs. + if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) { + assert(Kind == RecurKind::FMax && + "unexpected recurrence kind for maxnum"); + return InstDesc(I, RecurKind::FMaxNum); + } + if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value()))) { + assert(Kind == RecurKind::FMin && + "unexpected recurrence kind for minnum"); + return InstDesc(I, RecurKind::FMinNum); + } + return InstDesc(false, I); + } + if (isFMulAddIntrinsic(I)) return InstDesc(Kind == RecurKind::FMulAdd, I, I->hasAllowReassoc() ? nullptr : I); return InstDesc(false, I); diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 82530e7..5907e21 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5366,7 +5366,7 @@ static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, Type *MidTy = CI->getType(); Type *DstTy = Ty; if (Src->getType() == Ty) { - auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode()); + auto FirstOp = CI->getOpcode(); auto SecondOp = static_cast<Instruction::CastOps>(CastOpc); Type *SrcIntPtrTy = SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index f3a32d3..14be385 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -589,11 +589,11 @@ void RuntimePointerChecking::groupChecks( // dependence. Not grouping the checks for a[i] and a[i + 9000] allows // us to perform an accurate check in this case. // - // The above case requires that we have an UnknownDependence between - // accesses to the same underlying object. This cannot happen unless - // FoundNonConstantDistanceDependence is set, and therefore UseDependencies - // is also false. In this case we will use the fallback path and create - // separate checking groups for all pointers. + // In the above case, we have a non-constant distance and an Unknown + // dependence between accesses to the same underlying object, and could retry + // with runtime checks. Therefore UseDependencies is false. In this case we + // will use the fallback path and create separate checking groups for all + // pointers. // If we don't have the dependency partitions, construct a new // checking pointer group for each pointer. This is also required @@ -819,7 +819,7 @@ public: /// perform dependency checking. /// /// Note that this can later be cleared if we retry memcheck analysis without - /// dependency checking (i.e. FoundNonConstantDistanceDependence). + /// dependency checking (i.e. ShouldRetryWithRuntimeChecks). bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); } /// We decided that no dependence analysis would be used. Reset the state. @@ -896,7 +896,7 @@ private: /// /// Note that, this is different from isDependencyCheckNeeded. When we retry /// memcheck analysis without dependency checking - /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is + /// (i.e. ShouldRetryWithRuntimeChecks), isDependencyCheckNeeded is /// cleared while this remains set if we have potentially dependent accesses. bool IsRTCheckAnalysisNeeded = false; @@ -2079,11 +2079,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( if (StrideAScaled == StrideBScaled) CommonStride = StrideAScaled; - // TODO: FoundNonConstantDistanceDependence is used as a necessary condition - // to consider retrying with runtime checks. Historically, we did not set it - // when (unscaled) strides were different but there is no inherent reason to. + // TODO: Historically, we didn't retry with runtime checks when (unscaled) + // strides were different but there is no inherent reason to. if (!isa<SCEVConstant>(Dist)) - FoundNonConstantDistanceDependence |= StrideAPtrInt == StrideBPtrInt; + ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt; // If distance is a SCEVCouldNotCompute, return Unknown immediately. if (isa<SCEVCouldNotCompute>(Dist)) { @@ -2712,7 +2711,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, DepsAreSafe = DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck()); - if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) { + if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeChecks()) { LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); // Clear the dependency checks. We assume they are not needed. diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 3aa9909..2b0f212 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -983,33 +983,37 @@ MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock( static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries) { - switch (Cache.size() - NumSortedEntries) { - case 0: - // done, no new entries. - break; - case 2: { - // Two new entries, insert the last one into place. - NonLocalDepEntry Val = Cache.back(); - Cache.pop_back(); - MemoryDependenceResults::NonLocalDepInfo::iterator Entry = - std::upper_bound(Cache.begin(), Cache.end() - 1, Val); - Cache.insert(Entry, Val); - [[fallthrough]]; + + // If only one entry, don't sort. + if (Cache.size() < 2) + return; + + unsigned s = Cache.size() - NumSortedEntries; + + // If the cache is already sorted, don't sort it again. + if (s == 0) + return; + + // If no entry is sorted, sort the whole cache. + if (NumSortedEntries == 0) { + llvm::sort(Cache); + return; } - case 1: - // One new entry, Just insert the new value at the appropriate position. - if (Cache.size() != 1) { + + // If the number of unsorted entires is small and the cache size is big, using + // insertion sort is faster. Here use Log2_32 to quickly choose the sort + // method. + if (s < Log2_32(Cache.size())) { + while (s > 0) { NonLocalDepEntry Val = Cache.back(); Cache.pop_back(); MemoryDependenceResults::NonLocalDepInfo::iterator Entry = - llvm::upper_bound(Cache, Val); + std::upper_bound(Cache.begin(), Cache.end() - s + 1, Val); Cache.insert(Entry, Val); + s--; } - break; - default: - // Added many values, do a full scale sort. + } else { llvm::sort(Cache); - break; } } diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp index c08024a..b3c8a7d 100644 --- a/llvm/lib/Analysis/MemoryProfileInfo.cpp +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -157,6 +157,8 @@ void CallStackTrie::addCallStack( } void CallStackTrie::addCallStack(MDNode *MIB) { + // Note that we are building this from existing MD_memprof metadata. + BuiltFromExistingMetadata = true; MDNode *StackMD = getMIBStackNode(MIB); assert(StackMD); std::vector<uint64_t> CallStack; @@ -187,8 +189,9 @@ void CallStackTrie::addCallStack(MDNode *MIB) { static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack, AllocationType AllocType, ArrayRef<ContextTotalSize> ContextSizeInfo, - const uint64_t MaxColdSize, uint64_t &TotalBytes, - uint64_t &ColdBytes) { + const uint64_t MaxColdSize, + bool BuiltFromExistingMetadata, + uint64_t &TotalBytes, uint64_t &ColdBytes) { SmallVector<Metadata *> MIBPayload( {buildCallstackMetadata(MIBCallStack, Ctx)}); MIBPayload.push_back( @@ -197,8 +200,9 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack, if (ContextSizeInfo.empty()) { // The profile matcher should have provided context size info if there was a // MinCallsiteColdBytePercent < 100. Here we check >=100 to gracefully - // handle a user-provided percent larger than 100. - assert(MinCallsiteColdBytePercent >= 100); + // handle a user-provided percent larger than 100. However, we may not have + // this information if we built the Trie from existing MD_memprof metadata. + assert(BuiltFromExistingMetadata || MinCallsiteColdBytePercent >= 100); return MDNode::get(Ctx, MIBPayload); } @@ -252,9 +256,19 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) { static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes, std::vector<Metadata *> &SavedMIBNodes, unsigned CallerContextLength, - uint64_t TotalBytes, uint64_t ColdBytes) { + uint64_t TotalBytes, uint64_t ColdBytes, + bool BuiltFromExistingMetadata) { const bool MostlyCold = - MinCallsiteColdBytePercent < 100 && + // If we have built the Trie from existing MD_memprof metadata, we may or + // may not have context size information (in which case ColdBytes and + // TotalBytes are 0, which is not also guarded against below). Even if we + // do have some context size information from the the metadata, we have + // already gone through a round of discarding of small non-cold contexts + // during matching, and it would be overly aggressive to do it again, and + // we also want to maintain the same behavior with and without reporting + // of hinted bytes enabled. + !BuiltFromExistingMetadata && MinCallsiteColdBytePercent < 100 && + ColdBytes > 0 && ColdBytes * 100 >= MinCallsiteColdBytePercent * TotalBytes; // In the simplest case, with pruning disabled, keep all the new MIB nodes. @@ -386,9 +400,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, if (hasSingleAllocType(Node->AllocTypes)) { std::vector<ContextTotalSize> ContextSizeInfo; collectContextSizeInfo(Node, ContextSizeInfo); - MIBNodes.push_back( - createMIBNode(Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, - ContextSizeInfo, MaxColdSize, TotalBytes, ColdBytes)); + MIBNodes.push_back(createMIBNode( + Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextSizeInfo, + MaxColdSize, BuiltFromExistingMetadata, TotalBytes, ColdBytes)); return true; } @@ -416,7 +430,8 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, // Pass in the stack length of the MIB nodes added for the immediate caller, // which is the current stack length plus 1. saveFilteredNewMIBNodes(NewMIBNodes, MIBNodes, MIBCallStack.size() + 1, - CallerTotalBytes, CallerColdBytes); + CallerTotalBytes, CallerColdBytes, + BuiltFromExistingMetadata); TotalBytes += CallerTotalBytes; ColdBytes += CallerColdBytes; @@ -441,9 +456,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, return false; std::vector<ContextTotalSize> ContextSizeInfo; collectContextSizeInfo(Node, ContextSizeInfo); - MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold, - ContextSizeInfo, MaxColdSize, TotalBytes, - ColdBytes)); + MIBNodes.push_back(createMIBNode( + Ctx, MIBCallStack, AllocationType::NotCold, ContextSizeInfo, MaxColdSize, + BuiltFromExistingMetadata, TotalBytes, ColdBytes)); return true; } diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index e8d4e37..f1c3155 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -121,8 +121,18 @@ void ProfileSummaryInfo::computeThresholds() { ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary); ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary); - assert(ColdCountThreshold <= HotCountThreshold && - "Cold count threshold cannot exceed hot count threshold!"); + // When the hot and cold thresholds are identical, we would classify + // a count value as both hot and cold since we are doing an inclusive check + // (see ::is{Hot|Cold}Count(). To avoid this undesirable overlap, ensure the + // thresholds are distinct. + if (HotCountThreshold == ColdCountThreshold) { + if (ColdCountThreshold > 0) + (*ColdCountThreshold)--; + else + (*HotCountThreshold)++; + } + assert(ColdCountThreshold < HotCountThreshold && + "Cold count threshold should be less than hot count threshold!"); if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) { HasHugeWorkingSetSize = HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 24adfa3..0990a0d 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -11418,8 +11418,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred, XNonConstOp = X; XFlagsPresent = ExpectedFlags; } - if (!isa<SCEVConstant>(XConstOp) || - (XFlagsPresent & ExpectedFlags) != ExpectedFlags) + if (!isa<SCEVConstant>(XConstOp)) return false; if (!splitBinaryAdd(Y, YConstOp, YNonConstOp, YFlagsPresent)) { @@ -11428,12 +11427,20 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred, YFlagsPresent = ExpectedFlags; } - if (!isa<SCEVConstant>(YConstOp) || - (YFlagsPresent & ExpectedFlags) != ExpectedFlags) + if (YNonConstOp != XNonConstOp) return false; - if (YNonConstOp != XNonConstOp) + if (!isa<SCEVConstant>(YConstOp)) + return false; + + // When matching ADDs with NUW flags (and unsigned predicates), only the + // second ADD (with the larger constant) requires NUW. + if ((YFlagsPresent & ExpectedFlags) != ExpectedFlags) + return false; + if (ExpectedFlags != SCEV::FlagNUW && + (XFlagsPresent & ExpectedFlags) != ExpectedFlags) { return false; + } OutC1 = cast<SCEVConstant>(XConstOp)->getAPInt(); OutC2 = cast<SCEVConstant>(YConstOp)->getAPInt(); @@ -11472,7 +11479,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred, std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_ULE: - // (X + C1)<nuw> u<= (X + C2)<nuw> for C1 u<= C2. + // (X + C1) u<= (X + C2)<nuw> for C1 u<= C2. if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNUW) && C1.ule(C2)) return true; @@ -11482,7 +11489,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred, std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_ULT: - // (X + C1)<nuw> u< (X + C2)<nuw> if C1 u< C2. + // (X + C1) u< (X + C2)<nuw> if C1 u< C2. if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNUW) && C1.ult(C2)) return true; break; diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp index 21f54c7..34a7a04 100644 --- a/llvm/lib/Analysis/StackLifetime.cpp +++ b/llvm/lib/Analysis/StackLifetime.cpp @@ -63,10 +63,7 @@ bool StackLifetime::isAliveAfter(const AllocaInst *AI, // markers has the same size and points to the alloca start. static const AllocaInst *findMatchingAlloca(const IntrinsicInst &II, const DataLayout &DL) { - const AllocaInst *AI = findAllocaForValue(II.getArgOperand(1), true); - if (!AI) - return nullptr; - + const AllocaInst *AI = cast<AllocaInst>(II.getArgOperand(1)); auto AllocaSize = AI->getAllocationSize(DL); if (!AllocaSize) return nullptr; diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index e475be2..6e92766 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -875,6 +875,34 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_toascii); } + if (T.isOSFreeBSD()) { + TLI.setAvailable(LibFunc_dunder_strtok_r); + TLI.setAvailable(LibFunc_memalign); + TLI.setAvailable(LibFunc_fputc_unlocked); + TLI.setAvailable(LibFunc_fputs_unlocked); + TLI.setAvailable(LibFunc_fread_unlocked); + TLI.setAvailable(LibFunc_fwrite_unlocked); + TLI.setAvailable(LibFunc_getc_unlocked); + TLI.setAvailable(LibFunc_getchar_unlocked); + TLI.setAvailable(LibFunc_putc_unlocked); + TLI.setAvailable(LibFunc_putchar_unlocked); + + TLI.setUnavailable(LibFunc___kmpc_alloc_shared); + TLI.setUnavailable(LibFunc___kmpc_free_shared); + TLI.setUnavailable(LibFunc_dunder_strndup); + TLI.setUnavailable(LibFunc_memccpy_chk); + TLI.setUnavailable(LibFunc_strlen_chk); + TLI.setUnavailable(LibFunc_fmaximum_num); + TLI.setUnavailable(LibFunc_fmaximum_numf); + TLI.setUnavailable(LibFunc_fmaximum_numl); + TLI.setUnavailable(LibFunc_fminimum_num); + TLI.setUnavailable(LibFunc_fminimum_numf); + TLI.setUnavailable(LibFunc_fminimum_numl); + TLI.setUnavailable(LibFunc_roundeven); + TLI.setUnavailable(LibFunc_roundevenf); + TLI.setUnavailable(LibFunc_roundevenl); + } + // As currently implemented in clang, NVPTX code has no standard library to // speak of. Headers provide a standard-ish library implementation, but many // of the signatures are wrong -- for example, many libm functions are not diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 8a470eb..55ba52a 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1423,7 +1423,7 @@ bool TargetTransformInfo::hasArmWideBranch(bool Thumb) const { return TTIImpl->hasArmWideBranch(Thumb); } -uint64_t TargetTransformInfo::getFeatureMask(const Function &F) const { +APInt TargetTransformInfo::getFeatureMask(const Function &F) const { return TTIImpl->getFeatureMask(F); } diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 61a322b..af85ce4 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -7912,6 +7912,8 @@ bool llvm::intrinsicPropagatesPoison(Intrinsic::ID IID) { case Intrinsic::ushl_sat: case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: + case Intrinsic::umul_fix: + case Intrinsic::umul_fix_sat: case Intrinsic::pow: case Intrinsic::powi: case Intrinsic::sin: @@ -7928,6 +7930,22 @@ bool llvm::intrinsicPropagatesPoison(Intrinsic::ID IID) { case Intrinsic::atan2: case Intrinsic::canonicalize: case Intrinsic::sqrt: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::exp10: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::modf: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + case Intrinsic::round: + case Intrinsic::roundeven: + case Intrinsic::lrint: + case Intrinsic::llrint: return true; default: return false; |