diff options
Diffstat (limited to 'llvm/lib/Analysis/LoopAccessAnalysis.cpp')
-rw-r--r-- | llvm/lib/Analysis/LoopAccessAnalysis.cpp | 137 |
1 files changed, 123 insertions, 14 deletions
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 94b9fe9..212b3bf 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -188,9 +188,90 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( Members.push_back(Index); } +/// Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise +/// return nullptr. \p A and \p B must have the same type. +static const SCEV *addSCEVOverflow(const SCEV *A, const SCEV *B, + ScalarEvolution &SE) { + if (!SE.willNotOverflow(Instruction::Add, false, A, B)) + return nullptr; + return SE.getAddExpr(A, B); +} + +/// Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise +/// return nullptr. \p A and \p B must have the same type. +static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B, + ScalarEvolution &SE) { + if (!SE.willNotOverflow(Instruction::Mul, false, A, B)) + return nullptr; + return SE.getMulExpr(A, B); +} + +/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at +/// \p MaxBTC is guaranteed inbounds of the accessed object. +static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, + const SCEV *MaxBTC, + const SCEV *EltSize, + ScalarEvolution &SE, + const DataLayout &DL) { + auto *PointerBase = SE.getPointerBase(AR->getStart()); + auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase); + if (!StartPtr) + return false; + bool CheckForNonNull, CheckForFreed; + uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes( + DL, CheckForNonNull, CheckForFreed); + + if (CheckForNonNull || CheckForFreed) + return false; + + const SCEV *Step = AR->getStepRecurrence(SE); + bool IsKnownNonNegative = SE.isKnownNonNegative(Step); + if (!IsKnownNonNegative && !SE.isKnownNegative(Step)) + return false; + + Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType()); + Step = SE.getNoopOrSignExtend(Step, WiderTy); + MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy); + + // For the computations below, make sure they don't unsigned wrap. + if (!SE.isKnownPredicate(CmpInst::ICMP_UGE, AR->getStart(), StartPtr)) + return false; + const SCEV *StartOffset = SE.getNoopOrZeroExtend( + SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy); + + const SCEV *OffsetAtLastIter = + mulSCEVOverflow(MaxBTC, SE.getAbsExpr(Step, false), SE); + if (!OffsetAtLastIter) + return false; + + const SCEV *OffsetEndBytes = addSCEVOverflow( + OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE); + if (!OffsetEndBytes) + return false; + + if (IsKnownNonNegative) { + // For positive steps, check if + // (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes, + // while making sure none of the computations unsigned wrap themselves. + const SCEV *EndBytes = addSCEVOverflow(StartOffset, OffsetEndBytes, SE); + if (!EndBytes) + return false; + return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, + SE.getConstant(WiderTy, DerefBytes)); + } + + // For negative steps check if + // * StartOffset >= (MaxBTC * Step + EltSize) + // * StartOffset <= DerefBytes. + assert(SE.isKnownNegative(Step) && "must be known negative"); + return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) && + SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, + SE.getConstant(WiderTy, DerefBytes)); +} + std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess( - const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount, - ScalarEvolution *SE, + const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, + const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap<std::pair<const SCEV *, Type *>, std::pair<const SCEV *, const SCEV *>> *PointerBounds) { std::pair<const SCEV *, const SCEV *> *PtrBoundsPair; @@ -206,11 +287,37 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess( const SCEV *ScStart; const SCEV *ScEnd; + auto &DL = Lp->getHeader()->getDataLayout(); + Type *IdxTy = DL.getIndexType(PtrExpr->getType()); + const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy); if (SE->isLoopInvariant(PtrExpr, Lp)) { ScStart = ScEnd = PtrExpr; } else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) { ScStart = AR->getStart(); - ScEnd = AR->evaluateAtIteration(MaxBECount, *SE); + if (!isa<SCEVCouldNotCompute>(BTC)) + // Evaluating AR at an exact BTC is safe: LAA separately checks that + // accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then + // the loop either triggers UB when executing a memory access with a + // poison pointer or the wrapping/poisoned pointer is not used. + ScEnd = AR->evaluateAtIteration(BTC, *SE); + else { + // Evaluating AR at MaxBTC may wrap and create an expression that is less + // than the start of the AddRec due to wrapping (for example consider + // MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd + // will get incremented by EltSize before returning, so this effectively + // sets ScEnd to the maximum unsigned value for the type. Note that LAA + // separately checks that accesses cannot not wrap, so unsigned max + // represents an upper bound. + if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, + DL)) { + ScEnd = AR->evaluateAtIteration(MaxBTC, *SE); + } else { + ScEnd = SE->getAddExpr( + SE->getNegativeSCEV(EltSizeSCEV), + SE->getSCEV(ConstantExpr::getIntToPtr( + ConstantInt::get(EltSizeSCEV->getType(), -1), AR->getType()))); + } + } const SCEV *Step = AR->getStepRecurrence(*SE); // For expressions with negative step, the upper bound is ScStart and the @@ -232,9 +339,6 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess( assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant"); // Add the size of the pointed element to ScEnd. - auto &DL = Lp->getHeader()->getDataLayout(); - Type *IdxTy = DL.getIndexType(PtrExpr->getType()); - const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy); ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd}; @@ -250,9 +354,11 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze) { - const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount(); - const auto &[ScStart, ScEnd] = getStartAndEndForAccess( - Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds()); + const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); + const SCEV *BTC = PSE.getBackedgeTakenCount(); + const auto &[ScStart, ScEnd] = + getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, + PSE.getSE(), &DC.getPointerBounds()); assert(!isa<SCEVCouldNotCompute>(ScStart) && !isa<SCEVCouldNotCompute>(ScEnd) && "must be able to compute both start and end expressions"); @@ -1907,11 +2013,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( // required for correctness. if (SE.isLoopInvariant(Src, InnermostLoop) || SE.isLoopInvariant(Sink, InnermostLoop)) { - const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount(); - const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess( - InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds); - const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess( - InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds); + const SCEV *BTC = PSE.getBackedgeTakenCount(); + const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); + const auto &[SrcStart_, SrcEnd_] = + getStartAndEndForAccess(InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC, + PSE.getSE(), &PointerBounds); + const auto &[SinkStart_, SinkEnd_] = + getStartAndEndForAccess(InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC, + PSE.getSE(), &PointerBounds); if (!isa<SCEVCouldNotCompute>(SrcStart_) && !isa<SCEVCouldNotCompute>(SrcEnd_) && !isa<SCEVCouldNotCompute>(SinkStart_) && |