diff options
Diffstat (limited to 'clang/lib/Analysis/LifetimeSafety.cpp')
-rw-r--r-- | clang/lib/Analysis/LifetimeSafety.cpp | 311 |
1 files changed, 256 insertions, 55 deletions
diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index e3a03cf..94b8197 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -23,9 +23,10 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/TimeProfiler.h" #include <cstdint> +#include <memory> -namespace clang { -namespace { +namespace clang::lifetimes { +namespace internal { /// Represents the storage location being borrowed, e.g., a specific stack /// variable. @@ -36,32 +37,6 @@ struct AccessPath { AccessPath(const clang::ValueDecl *D) : D(D) {} }; -/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. -/// Used for giving ID to loans and origins. -template <typename Tag> struct ID { - uint32_t Value = 0; - - bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; } - bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); } - bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; } - ID<Tag> operator++(int) { - ID<Tag> Tmp = *this; - ++Value; - return Tmp; - } - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddInteger(Value); - } -}; - -template <typename Tag> -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID<Tag> ID) { - return OS << ID.Value; -} - -using LoanID = ID<struct LoanTag>; -using OriginID = ID<struct OriginTag>; - /// Information about a single borrow, or "Loan". A loan is created when a /// reference or pointer is created. struct Loan { @@ -223,7 +198,9 @@ public: /// An origin is propagated from a source to a destination (e.g., p = q). AssignOrigin, /// An origin escapes the function by flowing into the return value. - ReturnOfOrigin + ReturnOfOrigin, + /// A marker for a specific point in the code, for testing. + TestPoint, }; private: @@ -310,6 +287,24 @@ public: } }; +/// A dummy-fact used to mark a specific point in the code for testing. +/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. +class TestPointFact : public Fact { + StringRef Annotation; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } + + explicit TestPointFact(StringRef Annotation) + : Fact(Kind::TestPoint), Annotation(Annotation) {} + + StringRef getAnnotation() const { return Annotation; } + + void dump(llvm::raw_ostream &OS) const override { + OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; + } +}; + class FactManager { public: llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const { @@ -363,6 +358,7 @@ private: }; class FactGenerator : public ConstStmtVisitor<FactGenerator> { + using Base = ConstStmtVisitor<FactGenerator>; public: FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) @@ -458,6 +454,15 @@ public: } } + void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) { + // Check if this is a test point marker. If so, we are done with this + // expression. + if (VisitTestPoint(FCE)) + return; + // Visit as normal otherwise. + Base::VisitCXXFunctionalCastExpr(FCE); + } + private: // Check if a type has an origin. bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); } @@ -491,6 +496,27 @@ private: } } + /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. + /// If so, creates a `TestPointFact` and returns true. + bool VisitTestPoint(const CXXFunctionalCastExpr *FCE) { + if (!FCE->getType()->isVoidType()) + return false; + + const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); + if (const auto *SL = dyn_cast<StringLiteral>(SubExpr)) { + llvm::StringRef LiteralValue = SL->getString(); + const std::string Prefix = "__lifetime_test_point_"; + + if (LiteralValue.starts_with(Prefix)) { + StringRef Annotation = LiteralValue.drop_front(Prefix.length()); + CurrentBlockFacts.push_back( + FactMgr.createFact<TestPointFact>(Annotation)); + return true; + } + } + return false; + } + FactManager &FactMgr; AnalysisDeclContext &AC; llvm::SmallVector<Fact *> CurrentBlockFacts; @@ -502,6 +528,13 @@ private: enum class Direction { Forward, Backward }; +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + /// A generic, policy-based driver for dataflow analyses. It combines /// the dataflow runner and the transferer logic into a single class hierarchy. /// @@ -524,14 +557,20 @@ template <typename Derived, typename LatticeType, Direction Dir> class DataflowAnalysis { public: using Lattice = LatticeType; - using Base = DataflowAnalysis<Derived, LatticeType, Dir>; + using Base = DataflowAnalysis<Derived, Lattice, Dir>; private: const CFG &Cfg; AnalysisDeclContext &AC; + /// The dataflow state before a basic block is processed. llvm::DenseMap<const CFGBlock *, Lattice> InStates; + /// The dataflow state after a basic block is processed. llvm::DenseMap<const CFGBlock *, Lattice> OutStates; + /// The dataflow state at a Program Point. + /// In a forward analysis, this is the state after the Fact at that point has + /// been applied, while in a backward analysis, it is the state before. + llvm::DenseMap<ProgramPoint, Lattice> PerPointStates; static constexpr bool isForward() { return Dir == Direction::Forward; } @@ -577,6 +616,8 @@ public: } } + Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } + Lattice getInState(const CFGBlock *B) const { return InStates.lookup(B); } Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } @@ -590,18 +631,23 @@ public: getOutState(&B).dump(llvm::dbgs()); } +private: /// Computes the state at one end of a block by applying all its facts /// sequentially to a given state from the other end. - /// TODO: We might need to store intermediate states per-fact in the block for - /// later analysis. Lattice transferBlock(const CFGBlock *Block, Lattice State) { auto Facts = AllFacts.getFacts(Block); - if constexpr (isForward()) - for (const Fact *F : Facts) + if constexpr (isForward()) { + for (const Fact *F : Facts) { State = transferFact(State, F); - else - for (const Fact *F : llvm::reverse(Facts)) + PerPointStates[F] = State; + } + } else { + for (const Fact *F : llvm::reverse(Facts)) { + // In backward analysis, capture the state before applying the fact. + PerPointStates[F] = State; State = transferFact(State, F); + } + } return State; } @@ -617,6 +663,8 @@ public: return D->transfer(In, *F->getAs<AssignOriginFact>()); case Fact::Kind::ReturnOfOrigin: return D->transfer(In, *F->getAs<ReturnOfOriginFact>()); + case Fact::Kind::TestPoint: + return D->transfer(In, *F->getAs<TestPointFact>()); } llvm_unreachable("Unknown fact kind"); } @@ -626,14 +674,16 @@ public: Lattice transfer(Lattice In, const ExpireFact &) { return In; } Lattice transfer(Lattice In, const AssignOriginFact &) { return In; } Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } + Lattice transfer(Lattice In, const TestPointFact &) { return In; } }; namespace utils { /// Computes the union of two ImmutableSets. template <typename T> -llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A, llvm::ImmutableSet<T> B, - typename llvm::ImmutableSet<T>::Factory &F) { +static llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A, + llvm::ImmutableSet<T> B, + typename llvm::ImmutableSet<T>::Factory &F) { if (A.getHeight() < B.getHeight()) std::swap(A, B); for (const T &E : B) @@ -646,7 +696,7 @@ llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A, llvm::ImmutableSet<T> B, // efficient merge could be implemented using a Patricia Trie or HAMT // instead of the current AVL-tree-based ImmutableMap. template <typename K, typename V, typename Joiner> -llvm::ImmutableMap<K, V> +static llvm::ImmutableMap<K, V> join(llvm::ImmutableMap<K, V> A, llvm::ImmutableMap<K, V> B, typename llvm::ImmutableMap<K, V>::Factory &F, Joiner joinValues) { if (A.getHeight() < B.getHeight()) @@ -670,10 +720,6 @@ join(llvm::ImmutableMap<K, V> A, llvm::ImmutableMap<K, V> B, // Loan Propagation Analysis // ========================================================================= // -// Using LLVM's immutable collections is efficient for dataflow analysis -// as it avoids deep copies during state transitions. -// TODO(opt): Consider using a bitset to represent the set of loans. -using LoanSet = llvm::ImmutableSet<LoanID>; using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>; /// An object to hold the factories for immutable collections, ensuring @@ -769,6 +815,10 @@ public: Factory.OriginMapFactory.add(In.Origins, DestOID, SrcLoans)); } + LoanSet getLoans(OriginID OID, ProgramPoint P) { + return getLoans(getState(P), OID); + } + private: LoanSet getLoans(Lattice L, OriginID OID) { if (auto *Loans = L.Origins.lookup(OID)) @@ -778,23 +828,118 @@ private: }; // ========================================================================= // +// Expired Loans Analysis +// ========================================================================= // + +/// The dataflow lattice for tracking the set of expired loans. +struct ExpiredLattice { + LoanSet Expired; + + ExpiredLattice() : Expired(nullptr) {}; + explicit ExpiredLattice(LoanSet S) : Expired(S) {} + + bool operator==(const ExpiredLattice &Other) const { + return Expired == Other.Expired; + } + bool operator!=(const ExpiredLattice &Other) const { + return !(*this == Other); + } + + void dump(llvm::raw_ostream &OS) const { + OS << "ExpiredLattice State:\n"; + if (Expired.isEmpty()) + OS << " <empty>\n"; + for (const LoanID &LID : Expired) + OS << " Loan " << LID << " is expired\n"; + } +}; + +/// The analysis that tracks which loans have expired. +class ExpiredLoansAnalysis + : public DataflowAnalysis<ExpiredLoansAnalysis, ExpiredLattice, + Direction::Forward> { + + LoanSet::Factory &Factory; + +public: + ExpiredLoansAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LifetimeFactory &Factory) + : DataflowAnalysis(C, AC, F), Factory(Factory.LoanSetFactory) {} + + using Base::transfer; + + StringRef getAnalysisName() const { return "ExpiredLoans"; } + + Lattice getInitialState() { return Lattice(Factory.getEmptySet()); } + + /// Merges two lattices by taking the union of the expired loan sets. + Lattice join(Lattice L1, Lattice L2) const { + return Lattice(utils::join(L1.Expired, L2.Expired, Factory)); + } + + Lattice transfer(Lattice In, const ExpireFact &F) { + return Lattice(Factory.add(In.Expired, F.getLoanID())); + } + + // Removes the loan from the set of expired loans. + // + // When a loan is re-issued (e.g., in a loop), it is no longer considered + // expired. A loan can be in the expired set at the point of issue due to + // the dataflow state from a previous loop iteration being propagated along + // a backedge in the CFG. + // + // Note: This has a subtle false-negative though where a loan from previous + // iteration is not overwritten by a reissue. This needs careful tracking + // of loans "across iterations" which can be considered for future + // enhancements. + // + // void foo(int safe) { + // int* p = &safe; + // int* q = &safe; + // while (condition()) { + // int x = 1; + // p = &x; // A loan to 'x' is issued to 'p' in every iteration. + // if (condition()) { + // q = p; + // } + // (void)*p; // OK — 'p' points to 'x' from new iteration. + // (void)*q; // UaF - 'q' still points to 'x' from previous iteration + // // which is now destroyed. + // } + // } + Lattice transfer(Lattice In, const IssueFact &F) { + return Lattice(Factory.remove(In.Expired, F.getLoanID())); + } +}; + +// ========================================================================= // // TODO: -// - Modifying loan propagation to answer `LoanSet getLoans(Origin O, Point P)` // - Modify loan expiry analysis to answer `bool isExpired(Loan L, Point P)` // - Modify origin liveness analysis to answer `bool isLive(Origin O, Point P)` // - Using the above three to perform the final error reporting. // ========================================================================= // -} // anonymous namespace -void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, - AnalysisDeclContext &AC) { +// ========================================================================= // +// LifetimeSafetyAnalysis Class Implementation +// ========================================================================= // + +// We need this here for unique_ptr with forward declared class. +LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default; + +LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC) + : AC(AC), Factory(std::make_unique<LifetimeFactory>()), + FactMgr(std::make_unique<FactManager>()) {} + +void LifetimeSafetyAnalysis::run() { llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); + + const CFG &Cfg = *AC.getCFG(); DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), /*ShowColors=*/true)); - FactManager FactMgr; - FactGenerator FactGen(FactMgr, AC); + + FactGenerator FactGen(*FactMgr, AC); FactGen.run(); - DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); + DEBUG_WITH_TYPE("LifetimeFacts", FactMgr->dump(Cfg, AC)); /// TODO(opt): Consider optimizing individual blocks before running the /// dataflow analysis. @@ -805,9 +950,65 @@ void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, /// blocks; only Decls are visible. Therefore, loans in a block that /// never reach an Origin associated with a Decl can be safely dropped by /// the analysis. - LifetimeFactory Factory; - LoanPropagationAnalysis LoanPropagation(Cfg, AC, FactMgr, Factory); - LoanPropagation.run(); - DEBUG_WITH_TYPE("LifetimeLoanPropagation", LoanPropagation.dump()); + LoanPropagation = + std::make_unique<LoanPropagationAnalysis>(Cfg, AC, *FactMgr, *Factory); + LoanPropagation->run(); + + ExpiredLoans = + std::make_unique<ExpiredLoansAnalysis>(Cfg, AC, *FactMgr, *Factory); + ExpiredLoans->run(); +} + +LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, + ProgramPoint PP) const { + assert(LoanPropagation && "Analysis has not been run."); + return LoanPropagation->getLoans(OID, PP); +} + +LoanSet LifetimeSafetyAnalysis::getExpiredLoansAtPoint(ProgramPoint PP) const { + assert(ExpiredLoans && "ExpiredLoansAnalysis has not been run."); + return ExpiredLoans->getState(PP).Expired; +} + +std::optional<OriginID> +LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const { + assert(FactMgr && "FactManager not initialized"); + // This assumes the OriginManager's `get` can find an existing origin. + // We might need a `find` method on OriginManager to avoid `getOrCreate` logic + // in a const-query context if that becomes an issue. + return FactMgr->getOriginMgr().get(*D); +} + +std::vector<LoanID> +LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const { + assert(FactMgr && "FactManager not initialized"); + std::vector<LoanID> Result; + for (const Loan &L : FactMgr->getLoanMgr().getLoans()) + if (L.Path.D == VD) + Result.push_back(L.ID); + return Result; +} + +llvm::StringMap<ProgramPoint> LifetimeSafetyAnalysis::getTestPoints() const { + assert(FactMgr && "FactManager not initialized"); + llvm::StringMap<ProgramPoint> AnnotationToPointMap; + for (const CFGBlock *Block : *AC.getCFG()) { + for (const Fact *F : FactMgr->getFacts(Block)) { + if (const auto *TPF = F->getAs<TestPointFact>()) { + StringRef PointName = TPF->getAnnotation(); + assert(AnnotationToPointMap.find(PointName) == + AnnotationToPointMap.end() && + "more than one test points with the same name"); + AnnotationToPointMap[PointName] = F; + } + } + } + return AnnotationToPointMap; +} +} // namespace internal + +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC) { + internal::LifetimeSafetyAnalysis Analysis(AC); + Analysis.run(); } -} // namespace clang +} // namespace clang::lifetimes |