diff options
Diffstat (limited to 'llvm/lib/Analysis')
-rw-r--r-- | llvm/lib/Analysis/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Analysis/ConstantFolding.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Analysis/DXILResource.cpp | 135 | ||||
-rw-r--r-- | llvm/lib/Analysis/DependenceAnalysis.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/Analysis/InstructionSimplify.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Analysis/Loads.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Analysis/LoopAccessAnalysis.cpp | 76 | ||||
-rw-r--r-- | llvm/lib/Analysis/MemoryDependenceAnalysis.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Analysis/ProfileSummaryInfo.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Analysis/ScalarEvolution.cpp | 35 | ||||
-rw-r--r-- | llvm/lib/Analysis/StackLifetime.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Analysis/TargetLibraryInfo.cpp | 28 | ||||
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Analysis/UniformityAnalysis.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Analysis/VectorUtils.cpp | 30 |
16 files changed, 202 insertions, 238 deletions
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index cfde787..16dd6f8 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -175,6 +175,7 @@ add_llvm_component_library(LLVMAnalysis LINK_COMPONENTS BinaryFormat Core + FrontendHLSL Object ProfileData Support diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index e71ba5e..dd98b62 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -929,12 +929,11 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, if (!AllConstantInt) break; - // TODO: Try to intersect two inrange attributes? - if (!InRange) { - InRange = GEP->getInRange(); - if (InRange) - // Adjust inrange by offset until now. - InRange = InRange->sextOrTrunc(BitWidth).subtract(Offset); + // Adjust inrange offset and intersect inrange attributes + if (auto GEPRange = GEP->getInRange()) { + auto AdjustedGEPRange = GEPRange->sextOrTrunc(BitWidth).subtract(Offset); + InRange = + InRange ? InRange->intersectWith(AdjustedGEPRange) : AdjustedGEPRange; } Ptr = cast<Constant>(GEP->getOperand(0)); @@ -1374,7 +1373,7 @@ Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *Inst, if (ConstantFP *CFP = dyn_cast<ConstantFP>(Operand)) return flushDenormalConstantFP(CFP, Inst, IsOutput); - if (isa<ConstantAggregateZero, UndefValue, ConstantExpr>(Operand)) + if (isa<ConstantAggregateZero, UndefValue>(Operand)) return Operand; Type *Ty = Operand->getType(); @@ -1390,6 +1389,9 @@ Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *Inst, Ty = VecTy->getElementType(); } + if (isa<ConstantExpr>(Operand)) + return Operand; + if (const auto *CV = dyn_cast<ConstantVector>(Operand)) { SmallVector<Constant *, 16> NewElts; for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { @@ -2004,21 +2006,20 @@ inline bool llvm_fenv_testexcept() { return false; } -static const APFloat FTZPreserveSign(const APFloat &V) { +static APFloat FTZPreserveSign(const APFloat &V) { if (V.isDenormal()) return APFloat::getZero(V.getSemantics(), V.isNegative()); return V; } -static const APFloat FlushToPositiveZero(const APFloat &V) { +static APFloat FlushToPositiveZero(const APFloat &V) { if (V.isDenormal()) return APFloat::getZero(V.getSemantics(), false); return V; } -static const APFloat -FlushWithDenormKind(const APFloat &V, - DenormalMode::DenormalModeKind DenormKind) { +static APFloat FlushWithDenormKind(const APFloat &V, + DenormalMode::DenormalModeKind DenormKind) { assert(DenormKind != DenormalMode::DenormalModeKind::Invalid && DenormKind != DenormalMode::DenormalModeKind::Dynamic); switch (DenormKind) { @@ -2630,14 +2631,14 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, case Intrinsic::nvvm_ceil_d: return ConstantFoldFP( ceil, APF, Ty, - nvvm::GetNVVMDenromMode( + nvvm::GetNVVMDenormMode( nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); case Intrinsic::nvvm_fabs_ftz: case Intrinsic::nvvm_fabs: return ConstantFoldFP( fabs, APF, Ty, - nvvm::GetNVVMDenromMode( + nvvm::GetNVVMDenormMode( nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); case Intrinsic::nvvm_floor_ftz_f: @@ -2645,7 +2646,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, case Intrinsic::nvvm_floor_d: return ConstantFoldFP( floor, APF, Ty, - nvvm::GetNVVMDenromMode( + nvvm::GetNVVMDenormMode( nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); case Intrinsic::nvvm_rcp_rm_ftz_f: @@ -2678,11 +2679,12 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, case Intrinsic::nvvm_round_ftz_f: case Intrinsic::nvvm_round_f: case Intrinsic::nvvm_round_d: { - // Use APFloat implementation instead of native libm call, as some - // implementations (e.g. on PPC) do not preserve the sign of negative 0. + // nvvm_round is lowered to PTX cvt.rni, which will round to nearest + // integer, choosing even integer if source is equidistant between two + // integers, so the semantics are closer to "rint" rather than "round". bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID); auto V = IsFTZ ? FTZPreserveSign(APF) : APF; - V.roundToIntegral(APFloat::rmNearestTiesToAway); + V.roundToIntegral(APFloat::rmNearestTiesToEven); return ConstantFP::get(Ty->getContext(), V); } @@ -2707,7 +2709,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return nullptr; return ConstantFoldFP( sqrt, APF, Ty, - nvvm::GetNVVMDenromMode( + nvvm::GetNVVMDenormMode( nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); // AMDGCN Intrinsics: diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp index 2da6468..629fa7cd 100644 --- a/llvm/lib/Analysis/DXILResource.cpp +++ b/llvm/lib/Analysis/DXILResource.cpp @@ -995,18 +995,7 @@ SmallVector<dxil::ResourceInfo *> DXILResourceMap::findByUse(const Value *Key) { //===----------------------------------------------------------------------===// void DXILResourceBindingInfo::populate(Module &M, DXILResourceTypeMap &DRTM) { - struct Binding { - ResourceClass RC; - uint32_t Space; - uint32_t LowerBound; - uint32_t UpperBound; - Value *Name; - Binding(ResourceClass RC, uint32_t Space, uint32_t LowerBound, - uint32_t UpperBound, Value *Name) - : RC(RC), Space(Space), LowerBound(LowerBound), UpperBound(UpperBound), - Name(Name) {} - }; - SmallVector<Binding> Bindings; + hlsl::BindingInfoBuilder Builder; // collect all of the llvm.dx.resource.handlefrombinding calls; // make a note if there is llvm.dx.resource.handlefromimplicitbinding @@ -1036,132 +1025,20 @@ void DXILResourceBindingInfo::populate(Module &M, DXILResourceTypeMap &DRTM) { assert((Size < 0 || (unsigned)LowerBound + Size - 1 <= UINT32_MAX) && "upper bound register overflow"); uint32_t UpperBound = Size < 0 ? UINT32_MAX : LowerBound + Size - 1; - Bindings.emplace_back(RTI.getResourceClass(), Space, LowerBound, - UpperBound, Name); + Builder.trackBinding(RTI.getResourceClass(), Space, LowerBound, + UpperBound, Name); } break; } case Intrinsic::dx_resource_handlefromimplicitbinding: { - ImplicitBinding = true; + HasImplicitBinding = true; break; } } } - // sort all the collected bindings - llvm::stable_sort(Bindings, [](auto &LHS, auto &RHS) { - return std::tie(LHS.RC, LHS.Space, LHS.LowerBound) < - std::tie(RHS.RC, RHS.Space, RHS.LowerBound); - }); - - // remove duplicates - Binding *NewEnd = llvm::unique(Bindings, [](auto &LHS, auto &RHS) { - return std::tie(LHS.RC, LHS.Space, LHS.LowerBound, LHS.UpperBound, - LHS.Name) == std::tie(RHS.RC, RHS.Space, RHS.LowerBound, - RHS.UpperBound, RHS.Name); - }); - if (NewEnd != Bindings.end()) - Bindings.erase(NewEnd); - - // Go over the sorted bindings and build up lists of free register ranges - // for each binding type and used spaces. Bindings are sorted by resource - // class, space, and lower bound register slot. - BindingSpaces *BS = &SRVSpaces; - for (const Binding &B : Bindings) { - if (BS->RC != B.RC) - // move to the next resource class spaces - BS = &getBindingSpaces(B.RC); - - RegisterSpace *S = BS->Spaces.empty() ? &BS->Spaces.emplace_back(B.Space) - : &BS->Spaces.back(); - assert(S->Space <= B.Space && "bindings not sorted correctly?"); - if (B.Space != S->Space) - // add new space - S = &BS->Spaces.emplace_back(B.Space); - - // the space is full - set flag to report overlapping binding later - if (S->FreeRanges.empty()) { - OverlappingBinding = true; - continue; - } - - // adjust the last free range lower bound, split it in two, or remove it - BindingRange &LastFreeRange = S->FreeRanges.back(); - assert(LastFreeRange.UpperBound == UINT32_MAX); - if (LastFreeRange.LowerBound == B.LowerBound) { - if (B.UpperBound < UINT32_MAX) - LastFreeRange.LowerBound = B.UpperBound + 1; - else - S->FreeRanges.pop_back(); - } else if (LastFreeRange.LowerBound < B.LowerBound) { - LastFreeRange.UpperBound = B.LowerBound - 1; - if (B.UpperBound < UINT32_MAX) - S->FreeRanges.emplace_back(B.UpperBound + 1, UINT32_MAX); - } else { - OverlappingBinding = true; - if (B.UpperBound < UINT32_MAX) - LastFreeRange.LowerBound = - std::max(LastFreeRange.LowerBound, B.UpperBound + 1); - else - S->FreeRanges.pop_back(); - } - } -} - -// returns std::nulopt if binding could not be found in given space -std::optional<uint32_t> -DXILResourceBindingInfo::findAvailableBinding(dxil::ResourceClass RC, - uint32_t Space, int32_t Size) { - BindingSpaces &BS = getBindingSpaces(RC); - RegisterSpace &RS = BS.getOrInsertSpace(Space); - return RS.findAvailableBinding(Size); -} - -DXILResourceBindingInfo::RegisterSpace & -DXILResourceBindingInfo::BindingSpaces::getOrInsertSpace(uint32_t Space) { - for (auto *I = Spaces.begin(); I != Spaces.end(); ++I) { - if (I->Space == Space) - return *I; - if (I->Space < Space) - continue; - return *Spaces.insert(I, Space); - } - return Spaces.emplace_back(Space); -} - -std::optional<uint32_t> -DXILResourceBindingInfo::RegisterSpace::findAvailableBinding(int32_t Size) { - assert((Size == -1 || Size > 0) && "invalid size"); - - if (FreeRanges.empty()) - return std::nullopt; - - // unbounded array - if (Size == -1) { - BindingRange &Last = FreeRanges.back(); - if (Last.UpperBound != UINT32_MAX) - // this space is already occupied by an unbounded array - return std::nullopt; - uint32_t RegSlot = Last.LowerBound; - FreeRanges.pop_back(); - return RegSlot; - } - - // single resource or fixed-size array - for (BindingRange &R : FreeRanges) { - // compare the size as uint64_t to prevent overflow for range (0, - // UINT32_MAX) - if ((uint64_t)R.UpperBound - R.LowerBound + 1 < (uint64_t)Size) - continue; - uint32_t RegSlot = R.LowerBound; - // This might create a range where (LowerBound == UpperBound + 1). When - // that happens, the next time this function is called the range will - // skipped over by the check above (at this point Size is always > 0). - R.LowerBound += Size; - return RegSlot; - } - - return std::nullopt; + Bindings = Builder.calculateBindingInfo( + [this](auto, auto) { this->HasOverlappingBinding = true; }); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index dd9a44b..f1473b2 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3383,6 +3383,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SrcSubscripts, DstSubscripts)) return false; + assert(isLoopInvariant(SrcBase, SrcLoop) && + isLoopInvariant(DstBase, DstLoop) && + "Expected SrcBase and DstBase to be loop invariant"); + int Size = SrcSubscripts.size(); LLVM_DEBUG({ dbgs() << "\nSrcSubscripts: "; @@ -3666,6 +3670,19 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, SCEVUnionPredicate(Assume, *SE)); } + // Even if the base pointers are the same, they may not be loop-invariant. It + // could lead to incorrect results, as we're analyzing loop-carried + // dependencies. Src and Dst can be in different loops, so we need to check + // the base pointer is invariant in both loops. + Loop *SrcLoop = LI->getLoopFor(Src->getParent()); + Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + if (!isLoopInvariant(SrcBase, SrcLoop) || + !isLoopInvariant(DstBase, DstLoop)) { + LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n"); + return std::make_unique<Dependence>(Src, Dst, + SCEVUnionPredicate(Assume, *SE)); + } + uint64_t EltSize = SrcLoc.Size.toRaw(); const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase); const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase); diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 82530e7..5907e21 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5366,7 +5366,7 @@ static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, Type *MidTy = CI->getType(); Type *DstTy = Ty; if (Src->getType() == Ty) { - auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode()); + auto FirstOp = CI->getOpcode(); auto SecondOp = static_cast<Instruction::CastOps>(CastOpc); Type *SrcIntPtrTy = SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr; diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 393f264..6fc81d787 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -342,7 +342,7 @@ bool llvm::isDereferenceableAndAlignedInLoop( : SE.getConstantMaxBackedgeTakenCount(L); } const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess( - L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr); + L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr, &DT, AC); if (isa<SCEVCouldNotCompute>(AccessStart) || isa<SCEVCouldNotCompute>(AccessEnd)) return false; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 14be385..a553533 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -23,6 +23,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AssumeBundleQueries.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" @@ -208,28 +210,46 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B, /// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at /// \p MaxBTC is guaranteed inbounds of the accessed object. -static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, - const SCEV *MaxBTC, - const SCEV *EltSize, - ScalarEvolution &SE, - const DataLayout &DL) { +static bool +evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, + const SCEV *MaxBTC, const SCEV *EltSize, + ScalarEvolution &SE, const DataLayout &DL, + DominatorTree *DT, AssumptionCache *AC) { auto *PointerBase = SE.getPointerBase(AR->getStart()); auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase); if (!StartPtr) return false; + const Loop *L = AR->getLoop(); bool CheckForNonNull, CheckForFreed; - uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes( + Value *StartPtrV = StartPtr->getValue(); + uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes( DL, CheckForNonNull, CheckForFreed); - if (CheckForNonNull || CheckForFreed) + if (DerefBytes && (CheckForNonNull || CheckForFreed)) return false; const SCEV *Step = AR->getStepRecurrence(SE); + Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType()); + const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes); + + // Check if we have a suitable dereferencable assumption we can use. + if (!StartPtrV->canBeFreed()) { + RetainedKnowledge DerefRK = getKnowledgeValidInContext( + StartPtrV, {Attribute::Dereferenceable}, *AC, + L->getLoopPredecessor()->getTerminator(), DT); + if (DerefRK) { + DerefBytesSCEV = SE.getUMaxExpr( + DerefBytesSCEV, SE.getConstant(WiderTy, DerefRK.ArgValue)); + } + } + + if (DerefBytesSCEV->isZero()) + return false; + bool IsKnownNonNegative = SE.isKnownNonNegative(Step); if (!IsKnownNonNegative && !SE.isKnownNegative(Step)) return false; - Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType()); Step = SE.getNoopOrSignExtend(Step, WiderTy); MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy); @@ -256,8 +276,7 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE); if (!EndBytes) return false; - return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, - SE.getConstant(WiderTy, DerefBytes)); + return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV); } // For negative steps check if @@ -265,15 +284,15 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, // * StartOffset <= DerefBytes. assert(SE.isKnownNegative(Step) && "must be known negative"); return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) && - SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, - SE.getConstant(WiderTy, DerefBytes)); + SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV); } std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess( const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap<std::pair<const SCEV *, Type *>, - std::pair<const SCEV *, const SCEV *>> *PointerBounds) { + std::pair<const SCEV *, const SCEV *>> *PointerBounds, + DominatorTree *DT, AssumptionCache *AC) { std::pair<const SCEV *, const SCEV *> *PtrBoundsPair; if (PointerBounds) { auto [Iter, Ins] = PointerBounds->insert( @@ -308,8 +327,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess( // sets ScEnd to the maximum unsigned value for the type. Note that LAA // separately checks that accesses cannot not wrap, so unsigned max // represents an upper bound. - if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, - DL)) { + if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL, + DT, AC)) { ScEnd = AR->evaluateAtIteration(MaxBTC, *SE); } else { ScEnd = SE->getAddExpr( @@ -356,9 +375,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, bool NeedsFreeze) { const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); const SCEV *BTC = PSE.getBackedgeTakenCount(); - const auto &[ScStart, ScEnd] = - getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, - PSE.getSE(), &DC.getPointerBounds()); + const auto &[ScStart, ScEnd] = getStartAndEndForAccess( + Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(), + &DC.getPointerBounds(), DC.getDT(), DC.getAC()); assert(!isa<SCEVCouldNotCompute>(ScStart) && !isa<SCEVCouldNotCompute>(ScEnd) && "must be able to compute both start and end expressions"); @@ -1961,13 +1980,15 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src, const SCEV *BTC = PSE.getBackedgeTakenCount(); const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); ScalarEvolution &SE = *PSE.getSE(); - const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess( - InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds); + const auto &[SrcStart_, SrcEnd_] = + getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, + &SE, &PointerBounds, DT, AC); if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_)) return false; - const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess( - InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds); + const auto &[SinkStart_, SinkEnd_] = + getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, + &SE, &PointerBounds, DT, AC); if (isa<SCEVCouldNotCompute>(SinkStart_) || isa<SCEVCouldNotCompute>(SinkEnd_)) return false; @@ -3002,7 +3023,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI, - bool AllowPartial) + AssumptionCache *AC, bool AllowPartial) : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)), PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) { unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max(); @@ -3012,8 +3033,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, MaxTargetVectorWidthInBits = TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2; - DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides, - MaxTargetVectorWidthInBits); + DepChecker = std::make_unique<MemoryDepChecker>( + *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits); PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE); if (canAnalyzeLoop()) CanVecMem = analyzeLoop(AA, LI, TLI, DT); @@ -3082,7 +3103,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L, // or if it was created with a different value of AllowPartial. if (Inserted || It->second->hasAllowPartial() != AllowPartial) It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, - &LI, AllowPartial); + &LI, AC, AllowPartial); return *It->second; } @@ -3125,7 +3146,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F, auto &LI = FAM.getResult<LoopAnalysis>(F); auto &TTI = FAM.getResult<TargetIRAnalysis>(F); auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); - return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI); + auto &AC = FAM.getResult<AssumptionAnalysis>(F); + return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC); } AnalysisKey LoopAccessAnalysis::Key; diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 3aa9909..2b0f212 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -983,33 +983,37 @@ MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock( static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries) { - switch (Cache.size() - NumSortedEntries) { - case 0: - // done, no new entries. - break; - case 2: { - // Two new entries, insert the last one into place. - NonLocalDepEntry Val = Cache.back(); - Cache.pop_back(); - MemoryDependenceResults::NonLocalDepInfo::iterator Entry = - std::upper_bound(Cache.begin(), Cache.end() - 1, Val); - Cache.insert(Entry, Val); - [[fallthrough]]; + + // If only one entry, don't sort. + if (Cache.size() < 2) + return; + + unsigned s = Cache.size() - NumSortedEntries; + + // If the cache is already sorted, don't sort it again. + if (s == 0) + return; + + // If no entry is sorted, sort the whole cache. + if (NumSortedEntries == 0) { + llvm::sort(Cache); + return; } - case 1: - // One new entry, Just insert the new value at the appropriate position. - if (Cache.size() != 1) { + + // If the number of unsorted entires is small and the cache size is big, using + // insertion sort is faster. Here use Log2_32 to quickly choose the sort + // method. + if (s < Log2_32(Cache.size())) { + while (s > 0) { NonLocalDepEntry Val = Cache.back(); Cache.pop_back(); MemoryDependenceResults::NonLocalDepInfo::iterator Entry = - llvm::upper_bound(Cache, Val); + std::upper_bound(Cache.begin(), Cache.end() - s + 1, Val); Cache.insert(Entry, Val); + s--; } - break; - default: - // Added many values, do a full scale sort. + } else { llvm::sort(Cache); - break; } } diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index e8d4e37..f1c3155 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -121,8 +121,18 @@ void ProfileSummaryInfo::computeThresholds() { ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary); ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary); - assert(ColdCountThreshold <= HotCountThreshold && - "Cold count threshold cannot exceed hot count threshold!"); + // When the hot and cold thresholds are identical, we would classify + // a count value as both hot and cold since we are doing an inclusive check + // (see ::is{Hot|Cold}Count(). To avoid this undesirable overlap, ensure the + // thresholds are distinct. + if (HotCountThreshold == ColdCountThreshold) { + if (ColdCountThreshold > 0) + (*ColdCountThreshold)--; + else + (*HotCountThreshold)++; + } + assert(ColdCountThreshold < HotCountThreshold && + "Cold count threshold should be less than hot count threshold!"); if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) { HasHugeWorkingSetSize = HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 24adfa3..477e477 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -2682,6 +2682,20 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, return getAddExpr(NewOps, PreservedFlags); } } + + // Try to push the constant operand into a ZExt: A + zext (-A + B) -> zext + // (B), if trunc (A) + -A + B does not unsigned-wrap. + const SCEVAddExpr *InnerAdd; + if (match(B, m_scev_ZExt(m_scev_Add(InnerAdd)))) { + const SCEV *NarrowA = getTruncateExpr(A, InnerAdd->getType()); + if (NarrowA == getNegativeSCEV(InnerAdd->getOperand(0)) && + getZeroExtendExpr(NarrowA, B->getType()) == A && + hasFlags(StrengthenNoWrapFlags(this, scAddExpr, {NarrowA, InnerAdd}, + SCEV::FlagAnyWrap), + SCEV::FlagNUW)) { + return getZeroExtendExpr(getAddExpr(NarrowA, InnerAdd), B->getType()); + } + } } // Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y) @@ -11418,8 +11432,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred, XNonConstOp = X; XFlagsPresent = ExpectedFlags; } - if (!isa<SCEVConstant>(XConstOp) || - (XFlagsPresent & ExpectedFlags) != ExpectedFlags) + if (!isa<SCEVConstant>(XConstOp)) return false; if (!splitBinaryAdd(Y, YConstOp, YNonConstOp, YFlagsPresent)) { @@ -11428,13 +11441,21 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred, YFlagsPresent = ExpectedFlags; } - if (!isa<SCEVConstant>(YConstOp) || - (YFlagsPresent & ExpectedFlags) != ExpectedFlags) + if (YNonConstOp != XNonConstOp) return false; - if (YNonConstOp != XNonConstOp) + if (!isa<SCEVConstant>(YConstOp)) return false; + // When matching ADDs with NUW flags (and unsigned predicates), only the + // second ADD (with the larger constant) requires NUW. + if ((YFlagsPresent & ExpectedFlags) != ExpectedFlags) + return false; + if (ExpectedFlags != SCEV::FlagNUW && + (XFlagsPresent & ExpectedFlags) != ExpectedFlags) { + return false; + } + OutC1 = cast<SCEVConstant>(XConstOp)->getAPInt(); OutC2 = cast<SCEVConstant>(YConstOp)->getAPInt(); @@ -11472,7 +11493,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred, std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_ULE: - // (X + C1)<nuw> u<= (X + C2)<nuw> for C1 u<= C2. + // (X + C1) u<= (X + C2)<nuw> for C1 u<= C2. if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNUW) && C1.ule(C2)) return true; @@ -11482,7 +11503,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred, std::swap(LHS, RHS); [[fallthrough]]; case ICmpInst::ICMP_ULT: - // (X + C1)<nuw> u< (X + C2)<nuw> if C1 u< C2. + // (X + C1) u< (X + C2)<nuw> if C1 u< C2. if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNUW) && C1.ult(C2)) return true; break; diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp index 21f54c7..34a7a04 100644 --- a/llvm/lib/Analysis/StackLifetime.cpp +++ b/llvm/lib/Analysis/StackLifetime.cpp @@ -63,10 +63,7 @@ bool StackLifetime::isAliveAfter(const AllocaInst *AI, // markers has the same size and points to the alloca start. static const AllocaInst *findMatchingAlloca(const IntrinsicInst &II, const DataLayout &DL) { - const AllocaInst *AI = findAllocaForValue(II.getArgOperand(1), true); - if (!AI) - return nullptr; - + const AllocaInst *AI = cast<AllocaInst>(II.getArgOperand(1)); auto AllocaSize = AI->getAllocationSize(DL); if (!AllocaSize) return nullptr; diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index e475be2..6e92766 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -875,6 +875,34 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_toascii); } + if (T.isOSFreeBSD()) { + TLI.setAvailable(LibFunc_dunder_strtok_r); + TLI.setAvailable(LibFunc_memalign); + TLI.setAvailable(LibFunc_fputc_unlocked); + TLI.setAvailable(LibFunc_fputs_unlocked); + TLI.setAvailable(LibFunc_fread_unlocked); + TLI.setAvailable(LibFunc_fwrite_unlocked); + TLI.setAvailable(LibFunc_getc_unlocked); + TLI.setAvailable(LibFunc_getchar_unlocked); + TLI.setAvailable(LibFunc_putc_unlocked); + TLI.setAvailable(LibFunc_putchar_unlocked); + + TLI.setUnavailable(LibFunc___kmpc_alloc_shared); + TLI.setUnavailable(LibFunc___kmpc_free_shared); + TLI.setUnavailable(LibFunc_dunder_strndup); + TLI.setUnavailable(LibFunc_memccpy_chk); + TLI.setUnavailable(LibFunc_strlen_chk); + TLI.setUnavailable(LibFunc_fmaximum_num); + TLI.setUnavailable(LibFunc_fmaximum_numf); + TLI.setUnavailable(LibFunc_fmaximum_numl); + TLI.setUnavailable(LibFunc_fminimum_num); + TLI.setUnavailable(LibFunc_fminimum_numf); + TLI.setUnavailable(LibFunc_fminimum_numl); + TLI.setUnavailable(LibFunc_roundeven); + TLI.setUnavailable(LibFunc_roundevenf); + TLI.setUnavailable(LibFunc_roundevenl); + } + // As currently implemented in clang, NVPTX code has no standard library to // speak of. Headers provide a standard-ish library implementation, but many // of the signatures are wrong -- for example, many libm functions are not diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 8a470eb..c7eb2ec 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1423,7 +1423,7 @@ bool TargetTransformInfo::hasArmWideBranch(bool Thumb) const { return TTIImpl->hasArmWideBranch(Thumb); } -uint64_t TargetTransformInfo::getFeatureMask(const Function &F) const { +APInt TargetTransformInfo::getFeatureMask(const Function &F) const { return TTIImpl->getFeatureMask(F); } @@ -1486,6 +1486,10 @@ void TargetTransformInfo::collectKernelLaunchBounds( return TTIImpl->collectKernelLaunchBounds(F, LB); } +bool TargetTransformInfo::allowVectorElementIndexingUsingGEP() const { + return TTIImpl->allowVectorElementIndexingUsingGEP(); +} + TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default; TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index c871070..7025b83 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -525,6 +525,8 @@ AAMDNodes AAMDNodes::merge(const AAMDNodes &Other) const { Result.TBAAStruct = nullptr; Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope); Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias); + Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace( + NoAliasAddrSpace, Other.NoAliasAddrSpace); return Result; } @@ -533,6 +535,8 @@ AAMDNodes AAMDNodes::concat(const AAMDNodes &Other) const { Result.TBAA = Result.TBAAStruct = nullptr; Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope); Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias); + Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace( + NoAliasAddrSpace, Other.NoAliasAddrSpace); return Result; } diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp index 15107c2..2e4063f 100644 --- a/llvm/lib/Analysis/UniformityAnalysis.cpp +++ b/llvm/lib/Analysis/UniformityAnalysis.cpp @@ -178,6 +178,7 @@ bool UniformityInfoWrapperPass::runOnFunction(Function &F) { void UniformityInfoWrapperPass::print(raw_ostream &OS, const Module *) const { OS << "UniformityInfo for function '" << m_function->getName() << "':\n"; + m_uniformityInfo.print(OS); } void UniformityInfoWrapperPass::releaseMemory() { diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 1b3da59..b3b4c37 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -240,30 +240,6 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI, return Intrinsic::not_intrinsic; } -struct InterleaveIntrinsic { - Intrinsic::ID Interleave, Deinterleave; -}; - -static InterleaveIntrinsic InterleaveIntrinsics[] = { - {Intrinsic::vector_interleave2, Intrinsic::vector_deinterleave2}, - {Intrinsic::vector_interleave3, Intrinsic::vector_deinterleave3}, - {Intrinsic::vector_interleave4, Intrinsic::vector_deinterleave4}, - {Intrinsic::vector_interleave5, Intrinsic::vector_deinterleave5}, - {Intrinsic::vector_interleave6, Intrinsic::vector_deinterleave6}, - {Intrinsic::vector_interleave7, Intrinsic::vector_deinterleave7}, - {Intrinsic::vector_interleave8, Intrinsic::vector_deinterleave8}, -}; - -Intrinsic::ID llvm::getInterleaveIntrinsicID(unsigned Factor) { - assert(Factor >= 2 && Factor <= 8 && "Unexpected factor"); - return InterleaveIntrinsics[Factor - 2].Interleave; -} - -Intrinsic::ID llvm::getDeinterleaveIntrinsicID(unsigned Factor) { - assert(Factor >= 2 && Factor <= 8 && "Unexpected factor"); - return InterleaveIntrinsics[Factor - 2].Deinterleave; -} - unsigned llvm::getInterleaveIntrinsicFactor(Intrinsic::ID ID) { switch (ID) { case Intrinsic::vector_interleave2: @@ -1141,7 +1117,7 @@ Constant * llvm::createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup<Instruction> &Group) { // All 1's means mask is not needed. - if (Group.getNumMembers() == Group.getFactor()) + if (Group.isFull()) return nullptr; // TODO: support reversed access. @@ -1687,7 +1663,7 @@ void InterleavedAccessInfo::analyzeInterleaving( // Case 1: A full group. Can Skip the checks; For full groups, if the wide // load would wrap around the address space we would do a memory access at // nullptr even without the transformation. - if (Group->getNumMembers() == Group->getFactor()) + if (Group->isFull()) continue; // Case 2: If first and last members of the group don't wrap this implies @@ -1722,7 +1698,7 @@ void InterleavedAccessInfo::analyzeInterleaving( // Case 1: A full group. Can Skip the checks; For full groups, if the wide // store would wrap around the address space we would do a memory access at // nullptr even without the transformation. - if (Group->getNumMembers() == Group->getFactor()) + if (Group->isFull()) continue; // Interleave-store-group with gaps is implemented using masked wide store. |