aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Analysis/LifetimeSafety.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Analysis/LifetimeSafety.cpp')
-rw-r--r--clang/lib/Analysis/LifetimeSafety.cpp1546
1 files changed, 1546 insertions, 0 deletions
diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp
new file mode 100644
index 0000000..6196ec3
--- /dev/null
+++ b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -0,0 +1,1546 @@
+//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Analysis/Analyses/LifetimeSafety.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
+#include "clang/Analysis/Analyses/LifetimeAnnotations.h"
+#include "clang/Analysis/Analyses/PostOrderCFGView.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/ImmutableMap.h"
+#include "llvm/ADT/ImmutableSet.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TimeProfiler.h"
+#include <cstdint>
+#include <memory>
+#include <optional>
+
+namespace clang::lifetimes {
+namespace internal {
+
+/// Represents the storage location being borrowed, e.g., a specific stack
+/// variable.
+/// TODO: Model access paths of other types, e.g., s.field, heap and globals.
+struct AccessPath {
+ const clang::ValueDecl *D;
+
+ AccessPath(const clang::ValueDecl *D) : D(D) {}
+};
+
+/// Information about a single borrow, or "Loan". A loan is created when a
+/// reference or pointer is created.
+struct Loan {
+ /// TODO: Represent opaque loans.
+ /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it
+ /// is represented as empty LoanSet
+ LoanID ID;
+ AccessPath Path;
+ /// The expression that creates the loan, e.g., &x.
+ const Expr *IssueExpr;
+
+ Loan(LoanID id, AccessPath path, const Expr *IssueExpr)
+ : ID(id), Path(path), IssueExpr(IssueExpr) {}
+
+ void dump(llvm::raw_ostream &OS) const {
+ OS << ID << " (Path: ";
+ OS << Path.D->getNameAsString() << ")";
+ }
+};
+
+/// An Origin is a symbolic identifier that represents the set of possible
+/// loans a pointer-like object could hold at any given time.
+/// TODO: Enhance the origin model to handle complex types, pointer
+/// indirection and reborrowing. The plan is to move from a single origin per
+/// variable/expression to a "list of origins" governed by the Type.
+/// For example, the type 'int**' would have two origins.
+/// See discussion:
+/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238
+struct Origin {
+ OriginID ID;
+ /// A pointer to the AST node that this origin represents. This union
+ /// distinguishes between origins from declarations (variables or parameters)
+ /// and origins from expressions.
+ llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr;
+
+ Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {}
+ Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {}
+
+ const clang::ValueDecl *getDecl() const {
+ return Ptr.dyn_cast<const clang::ValueDecl *>();
+ }
+ const clang::Expr *getExpr() const {
+ return Ptr.dyn_cast<const clang::Expr *>();
+ }
+};
+
+/// Manages the creation, storage and retrieval of loans.
+class LoanManager {
+public:
+ LoanManager() = default;
+
+ Loan &addLoan(AccessPath Path, const Expr *IssueExpr) {
+ AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr);
+ return AllLoans.back();
+ }
+
+ const Loan &getLoan(LoanID ID) const {
+ assert(ID.Value < AllLoans.size());
+ return AllLoans[ID.Value];
+ }
+ llvm::ArrayRef<Loan> getLoans() const { return AllLoans; }
+
+private:
+ LoanID getNextLoanID() { return NextLoanID++; }
+
+ LoanID NextLoanID{0};
+ /// TODO(opt): Profile and evaluate the usefullness of small buffer
+ /// optimisation.
+ llvm::SmallVector<Loan> AllLoans;
+};
+
+/// Manages the creation, storage, and retrieval of origins for pointer-like
+/// variables and expressions.
+class OriginManager {
+public:
+ OriginManager() = default;
+
+ Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) {
+ AllOrigins.emplace_back(ID, &D);
+ return AllOrigins.back();
+ }
+ Origin &addOrigin(OriginID ID, const clang::Expr &E) {
+ AllOrigins.emplace_back(ID, &E);
+ return AllOrigins.back();
+ }
+
+ // TODO: Mark this method as const once we remove the call to getOrCreate.
+ OriginID get(const Expr &E) {
+ auto It = ExprToOriginID.find(&E);
+ if (It != ExprToOriginID.end())
+ return It->second;
+ // If the expression itself has no specific origin, and it's a reference
+ // to a declaration, its origin is that of the declaration it refers to.
+ // For pointer types, where we don't pre-emptively create an origin for the
+ // DeclRefExpr itself.
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(&E))
+ return get(*DRE->getDecl());
+ // TODO: This should be an assert(It != ExprToOriginID.end()). The current
+ // implementation falls back to getOrCreate to avoid crashing on
+ // yet-unhandled pointer expressions, creating an empty origin for them.
+ return getOrCreate(E);
+ }
+
+ OriginID get(const ValueDecl &D) {
+ auto It = DeclToOriginID.find(&D);
+ // TODO: This should be an assert(It != DeclToOriginID.end()). The current
+ // implementation falls back to getOrCreate to avoid crashing on
+ // yet-unhandled pointer expressions, creating an empty origin for them.
+ if (It == DeclToOriginID.end())
+ return getOrCreate(D);
+
+ return It->second;
+ }
+
+ OriginID getOrCreate(const Expr &E) {
+ auto It = ExprToOriginID.find(&E);
+ if (It != ExprToOriginID.end())
+ return It->second;
+
+ OriginID NewID = getNextOriginID();
+ addOrigin(NewID, E);
+ ExprToOriginID[&E] = NewID;
+ return NewID;
+ }
+
+ const Origin &getOrigin(OriginID ID) const {
+ assert(ID.Value < AllOrigins.size());
+ return AllOrigins[ID.Value];
+ }
+
+ llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; }
+
+ OriginID getOrCreate(const ValueDecl &D) {
+ auto It = DeclToOriginID.find(&D);
+ if (It != DeclToOriginID.end())
+ return It->second;
+ OriginID NewID = getNextOriginID();
+ addOrigin(NewID, D);
+ DeclToOriginID[&D] = NewID;
+ return NewID;
+ }
+
+ void dump(OriginID OID, llvm::raw_ostream &OS) const {
+ OS << OID << " (";
+ Origin O = getOrigin(OID);
+ if (const ValueDecl *VD = O.getDecl())
+ OS << "Decl: " << VD->getNameAsString();
+ else if (const Expr *E = O.getExpr())
+ OS << "Expr: " << E->getStmtClassName();
+ else
+ OS << "Unknown";
+ OS << ")";
+ }
+
+private:
+ OriginID getNextOriginID() { return NextOriginID++; }
+
+ OriginID NextOriginID{0};
+ /// TODO(opt): Profile and evaluate the usefullness of small buffer
+ /// optimisation.
+ llvm::SmallVector<Origin> AllOrigins;
+ llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID;
+ llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID;
+};
+
+/// An abstract base class for a single, atomic lifetime-relevant event.
+class Fact {
+
+public:
+ enum class Kind : uint8_t {
+ /// A new loan is issued from a borrow expression (e.g., &x).
+ Issue,
+ /// A loan expires as its underlying storage is freed (e.g., variable goes
+ /// out of scope).
+ Expire,
+ /// An origin is propagated from a source to a destination (e.g., p = q).
+ /// This can also optionally kill the destination origin before flowing into
+ /// it. Otherwise, the source's loan set is merged into the destination's
+ /// loan set.
+ OriginFlow,
+ /// An origin escapes the function by flowing into the return value.
+ ReturnOfOrigin,
+ /// An origin is used (eg. appears as l-value expression like DeclRefExpr).
+ Use,
+ /// A marker for a specific point in the code, for testing.
+ TestPoint,
+ };
+
+private:
+ Kind K;
+
+protected:
+ Fact(Kind K) : K(K) {}
+
+public:
+ virtual ~Fact() = default;
+ Kind getKind() const { return K; }
+
+ template <typename T> const T *getAs() const {
+ if (T::classof(this))
+ return static_cast<const T *>(this);
+ return nullptr;
+ }
+
+ virtual void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &) const {
+ OS << "Fact (Kind: " << static_cast<int>(K) << ")\n";
+ }
+};
+
+class IssueFact : public Fact {
+ LoanID LID;
+ OriginID OID;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; }
+
+ IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {}
+ LoanID getLoanID() const { return LID; }
+ OriginID getOriginID() const { return OID; }
+ void dump(llvm::raw_ostream &OS, const LoanManager &LM,
+ const OriginManager &OM) const override {
+ OS << "Issue (";
+ LM.getLoan(getLoanID()).dump(OS);
+ OS << ", ToOrigin: ";
+ OM.dump(getOriginID(), OS);
+ OS << ")\n";
+ }
+};
+
+class ExpireFact : public Fact {
+ LoanID LID;
+ SourceLocation ExpiryLoc;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; }
+
+ ExpireFact(LoanID LID, SourceLocation ExpiryLoc)
+ : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {}
+
+ LoanID getLoanID() const { return LID; }
+ SourceLocation getExpiryLoc() const { return ExpiryLoc; }
+
+ void dump(llvm::raw_ostream &OS, const LoanManager &LM,
+ const OriginManager &) const override {
+ OS << "Expire (";
+ LM.getLoan(getLoanID()).dump(OS);
+ OS << ")\n";
+ }
+};
+
+class OriginFlowFact : public Fact {
+ OriginID OIDDest;
+ OriginID OIDSrc;
+ // True if the destination origin should be killed (i.e., its current loans
+ // cleared) before the source origin's loans are flowed into it.
+ bool KillDest;
+
+public:
+ static bool classof(const Fact *F) {
+ return F->getKind() == Kind::OriginFlow;
+ }
+
+ OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest)
+ : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc),
+ KillDest(KillDest) {}
+
+ OriginID getDestOriginID() const { return OIDDest; }
+ OriginID getSrcOriginID() const { return OIDSrc; }
+ bool getKillDest() const { return KillDest; }
+
+ void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &OM) const override {
+ OS << "OriginFlow (Dest: ";
+ OM.dump(getDestOriginID(), OS);
+ OS << ", Src: ";
+ OM.dump(getSrcOriginID(), OS);
+ OS << (getKillDest() ? "" : ", Merge");
+ OS << ")\n";
+ }
+};
+
+class ReturnOfOriginFact : public Fact {
+ OriginID OID;
+
+public:
+ static bool classof(const Fact *F) {
+ return F->getKind() == Kind::ReturnOfOrigin;
+ }
+
+ ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {}
+ OriginID getReturnedOriginID() const { return OID; }
+ void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &OM) const override {
+ OS << "ReturnOfOrigin (";
+ OM.dump(getReturnedOriginID(), OS);
+ OS << ")\n";
+ }
+};
+
+class UseFact : public Fact {
+ const Expr *UseExpr;
+ // True if this use is a write operation (e.g., left-hand side of assignment).
+ // Write operations are exempted from use-after-free checks.
+ bool IsWritten = false;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Use; }
+
+ UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {}
+
+ OriginID getUsedOrigin(const OriginManager &OM) const {
+ // TODO: Remove const cast and make OriginManager::get as const.
+ return const_cast<OriginManager &>(OM).get(*UseExpr);
+ }
+ const Expr *getUseExpr() const { return UseExpr; }
+ void markAsWritten() { IsWritten = true; }
+ bool isWritten() const { return IsWritten; }
+
+ void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &OM) const override {
+ OS << "Use (";
+ OM.dump(getUsedOrigin(OM), OS);
+ OS << ", " << (isWritten() ? "Write" : "Read") << ")\n";
+ }
+};
+
+/// A dummy-fact used to mark a specific point in the code for testing.
+/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast.
+class TestPointFact : public Fact {
+ StringRef Annotation;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; }
+
+ explicit TestPointFact(StringRef Annotation)
+ : Fact(Kind::TestPoint), Annotation(Annotation) {}
+
+ StringRef getAnnotation() const { return Annotation; }
+
+ void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &) const override {
+ OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n";
+ }
+};
+
+class FactManager {
+public:
+ llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const {
+ auto It = BlockToFactsMap.find(B);
+ if (It != BlockToFactsMap.end())
+ return It->second;
+ return {};
+ }
+
+ void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) {
+ if (!NewFacts.empty())
+ BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end());
+ }
+
+ template <typename FactType, typename... Args>
+ FactType *createFact(Args &&...args) {
+ void *Mem = FactAllocator.Allocate<FactType>();
+ return new (Mem) FactType(std::forward<Args>(args)...);
+ }
+
+ void dump(const CFG &Cfg, AnalysisDeclContext &AC) const {
+ llvm::dbgs() << "==========================================\n";
+ llvm::dbgs() << " Lifetime Analysis Facts:\n";
+ llvm::dbgs() << "==========================================\n";
+ if (const Decl *D = AC.getDecl())
+ if (const auto *ND = dyn_cast<NamedDecl>(D))
+ llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n";
+ // Print blocks in the order as they appear in code for a stable ordering.
+ for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) {
+ llvm::dbgs() << " Block B" << B->getBlockID() << ":\n";
+ auto It = BlockToFactsMap.find(B);
+ if (It != BlockToFactsMap.end()) {
+ for (const Fact *F : It->second) {
+ llvm::dbgs() << " ";
+ F->dump(llvm::dbgs(), LoanMgr, OriginMgr);
+ }
+ }
+ llvm::dbgs() << " End of Block\n";
+ }
+ }
+
+ LoanManager &getLoanMgr() { return LoanMgr; }
+ OriginManager &getOriginMgr() { return OriginMgr; }
+
+private:
+ LoanManager LoanMgr;
+ OriginManager OriginMgr;
+ llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>>
+ BlockToFactsMap;
+ llvm::BumpPtrAllocator FactAllocator;
+};
+
+class FactGenerator : public ConstStmtVisitor<FactGenerator> {
+ using Base = ConstStmtVisitor<FactGenerator>;
+
+public:
+ FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC)
+ : FactMgr(FactMgr), AC(AC) {}
+
+ void run() {
+ llvm::TimeTraceScope TimeProfile("FactGenerator");
+ // Iterate through the CFG blocks in reverse post-order to ensure that
+ // initializations and destructions are processed in the correct sequence.
+ for (const CFGBlock *Block : *AC.getAnalysis<PostOrderCFGView>()) {
+ CurrentBlockFacts.clear();
+ for (unsigned I = 0; I < Block->size(); ++I) {
+ const CFGElement &Element = Block->Elements[I];
+ if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>())
+ Visit(CS->getStmt());
+ else if (std::optional<CFGAutomaticObjDtor> DtorOpt =
+ Element.getAs<CFGAutomaticObjDtor>())
+ handleDestructor(*DtorOpt);
+ }
+ FactMgr.addBlockFacts(Block, CurrentBlockFacts);
+ }
+ }
+
+ void VisitDeclStmt(const DeclStmt *DS) {
+ for (const Decl *D : DS->decls())
+ if (const auto *VD = dyn_cast<VarDecl>(D))
+ if (hasOrigin(VD))
+ if (const Expr *InitExpr = VD->getInit())
+ killAndFlowOrigin(*VD, *InitExpr);
+ }
+
+ void VisitDeclRefExpr(const DeclRefExpr *DRE) {
+ handleUse(DRE);
+ // For non-pointer/non-view types, a reference to the variable's storage
+ // is a borrow. We create a loan for it.
+ // For pointer/view types, we stick to the existing model for now and do
+ // not create an extra origin for the l-value expression itself.
+
+ // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is
+ // not sufficient to model the different levels of indirection. The current
+ // single-origin model cannot distinguish between a loan to the variable's
+ // storage and a loan to what it points to. A multi-origin model would be
+ // required for this.
+ if (!isPointerType(DRE->getType())) {
+ if (const Loan *L = createLoan(DRE)) {
+ OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE);
+ CurrentBlockFacts.push_back(
+ FactMgr.createFact<IssueFact>(L->ID, ExprOID));
+ }
+ }
+ }
+
+ void VisitCXXConstructExpr(const CXXConstructExpr *CCE) {
+ if (isGslPointerType(CCE->getType())) {
+ handleGSLPointerConstruction(CCE);
+ return;
+ }
+ }
+
+ void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) {
+ // Specifically for conversion operators,
+ // like `std::string_view p = std::string{};`
+ if (isGslPointerType(MCE->getType()) &&
+ isa<CXXConversionDecl>(MCE->getCalleeDecl())) {
+ // The argument is the implicit object itself.
+ handleFunctionCall(MCE, MCE->getMethodDecl(),
+ {MCE->getImplicitObjectArgument()},
+ /*IsGslConstruction=*/true);
+ }
+ if (const CXXMethodDecl *Method = MCE->getMethodDecl()) {
+ // Construct the argument list, with the implicit 'this' object as the
+ // first argument.
+ llvm::SmallVector<const Expr *, 4> Args;
+ Args.push_back(MCE->getImplicitObjectArgument());
+ Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs());
+
+ handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false);
+ }
+ }
+
+ void VisitCallExpr(const CallExpr *CE) {
+ handleFunctionCall(CE, CE->getDirectCallee(),
+ {CE->getArgs(), CE->getNumArgs()});
+ }
+
+ void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) {
+ /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized
+ /// pointers can use the same type of loan.
+ FactMgr.getOriginMgr().getOrCreate(*N);
+ }
+
+ void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
+ if (!hasOrigin(ICE))
+ return;
+ // An ImplicitCastExpr node itself gets an origin, which flows from the
+ // origin of its sub-expression (after stripping its own parens/casts).
+ killAndFlowOrigin(*ICE, *ICE->getSubExpr());
+ }
+
+ void VisitUnaryOperator(const UnaryOperator *UO) {
+ if (UO->getOpcode() == UO_AddrOf) {
+ const Expr *SubExpr = UO->getSubExpr();
+ // Taking address of a pointer-type expression is not yet supported and
+ // will be supported in multi-origin model.
+ if (isPointerType(SubExpr->getType()))
+ return;
+ // The origin of an address-of expression (e.g., &x) is the origin of
+ // its sub-expression (x). This fact will cause the dataflow analysis
+ // to propagate any loans held by the sub-expression's origin to the
+ // origin of this UnaryOperator expression.
+ killAndFlowOrigin(*UO, *SubExpr);
+ }
+ }
+
+ void VisitReturnStmt(const ReturnStmt *RS) {
+ if (const Expr *RetExpr = RS->getRetValue()) {
+ if (hasOrigin(RetExpr)) {
+ OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr);
+ CurrentBlockFacts.push_back(
+ FactMgr.createFact<ReturnOfOriginFact>(OID));
+ }
+ }
+ }
+
+ void VisitBinaryOperator(const BinaryOperator *BO) {
+ if (BO->isAssignmentOp())
+ handleAssignment(BO->getLHS(), BO->getRHS());
+ }
+
+ void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) {
+ // Assignment operators have special "kill-then-propagate" semantics
+ // and are handled separately.
+ if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) {
+ handleAssignment(OCE->getArg(0), OCE->getArg(1));
+ return;
+ }
+ handleFunctionCall(OCE, OCE->getDirectCallee(),
+ {OCE->getArgs(), OCE->getNumArgs()},
+ /*IsGslConstruction=*/false);
+ }
+
+ void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) {
+ // Check if this is a test point marker. If so, we are done with this
+ // expression.
+ if (handleTestPoint(FCE))
+ return;
+ if (isGslPointerType(FCE->getType()))
+ killAndFlowOrigin(*FCE, *FCE->getSubExpr());
+ }
+
+ void VisitInitListExpr(const InitListExpr *ILE) {
+ if (!hasOrigin(ILE))
+ return;
+ // For list initialization with a single element, like `View{...}`, the
+ // origin of the list itself is the origin of its single element.
+ if (ILE->getNumInits() == 1)
+ killAndFlowOrigin(*ILE, *ILE->getInit(0));
+ }
+
+ void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE) {
+ if (!hasOrigin(MTE))
+ return;
+ // A temporary object's origin is the same as the origin of the
+ // expression that initializes it.
+ killAndFlowOrigin(*MTE, *MTE->getSubExpr());
+ }
+
+ void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) {
+ /// TODO: Also handle trivial destructors (e.g., for `int`
+ /// variables) which will never have a CFGAutomaticObjDtor node.
+ /// TODO: Handle loans to temporaries.
+ /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the
+ /// lifetime ends.
+ const VarDecl *DestructedVD = DtorOpt.getVarDecl();
+ if (!DestructedVD)
+ return;
+ // Iterate through all loans to see if any expire.
+ /// TODO(opt): Do better than a linear search to find loans associated with
+ /// 'DestructedVD'.
+ for (const Loan &L : FactMgr.getLoanMgr().getLoans()) {
+ const AccessPath &LoanPath = L.Path;
+ // Check if the loan is for a stack variable and if that variable
+ // is the one being destructed.
+ if (LoanPath.D == DestructedVD)
+ CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>(
+ L.ID, DtorOpt.getTriggerStmt()->getEndLoc()));
+ }
+ }
+
+private:
+ static bool isGslPointerType(QualType QT) {
+ if (const auto *RD = QT->getAsCXXRecordDecl()) {
+ // We need to check the template definition for specializations.
+ if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RD))
+ return CTSD->getSpecializedTemplate()
+ ->getTemplatedDecl()
+ ->hasAttr<PointerAttr>();
+ return RD->hasAttr<PointerAttr>();
+ }
+ return false;
+ }
+
+ static bool isPointerType(QualType QT) {
+ return QT->isPointerOrReferenceType() || isGslPointerType(QT);
+ }
+ // Check if a type has an origin.
+ static bool hasOrigin(const Expr *E) {
+ return E->isGLValue() || isPointerType(E->getType());
+ }
+
+ static bool hasOrigin(const VarDecl *VD) {
+ return isPointerType(VD->getType());
+ }
+
+ void handleGSLPointerConstruction(const CXXConstructExpr *CCE) {
+ assert(isGslPointerType(CCE->getType()));
+ if (CCE->getNumArgs() != 1)
+ return;
+ if (hasOrigin(CCE->getArg(0)))
+ killAndFlowOrigin(*CCE, *CCE->getArg(0));
+ else
+ // This could be a new borrow.
+ handleFunctionCall(CCE, CCE->getConstructor(),
+ {CCE->getArgs(), CCE->getNumArgs()},
+ /*IsGslConstruction=*/true);
+ }
+
+ /// Checks if a call-like expression creates a borrow by passing a value to a
+ /// reference parameter, creating an IssueFact if it does.
+ /// \param IsGslConstruction True if this is a GSL construction where all
+ /// argument origins should flow to the returned origin.
+ void handleFunctionCall(const Expr *Call, const FunctionDecl *FD,
+ ArrayRef<const Expr *> Args,
+ bool IsGslConstruction = false) {
+ // Ignore functions returning values with no origin.
+ if (!FD || !hasOrigin(Call))
+ return;
+ auto IsArgLifetimeBound = [FD](unsigned I) -> bool {
+ const ParmVarDecl *PVD = nullptr;
+ if (const auto *Method = dyn_cast<CXXMethodDecl>(FD);
+ Method && Method->isInstance()) {
+ if (I == 0)
+ // For the 'this' argument, the attribute is on the method itself.
+ return implicitObjectParamIsLifetimeBound(Method);
+ if ((I - 1) < Method->getNumParams())
+ // For explicit arguments, find the corresponding parameter
+ // declaration.
+ PVD = Method->getParamDecl(I - 1);
+ } else if (I < FD->getNumParams())
+ // For free functions or static methods.
+ PVD = FD->getParamDecl(I);
+ return PVD ? PVD->hasAttr<clang::LifetimeBoundAttr>() : false;
+ };
+ if (Args.empty())
+ return;
+ bool killedSrc = false;
+ for (unsigned I = 0; I < Args.size(); ++I)
+ if (IsGslConstruction || IsArgLifetimeBound(I)) {
+ if (!killedSrc) {
+ killedSrc = true;
+ killAndFlowOrigin(*Call, *Args[I]);
+ } else
+ flowOrigin(*Call, *Args[I]);
+ }
+ }
+
+ /// Creates a loan for the storage path of a given declaration reference.
+ /// This function should be called whenever a DeclRefExpr represents a borrow.
+ /// \param DRE The declaration reference expression that initiates the borrow.
+ /// \return The new Loan on success, nullptr otherwise.
+ const Loan *createLoan(const DeclRefExpr *DRE) {
+ if (const auto *VD = dyn_cast<ValueDecl>(DRE->getDecl())) {
+ AccessPath Path(VD);
+ // The loan is created at the location of the DeclRefExpr.
+ return &FactMgr.getLoanMgr().addLoan(Path, DRE);
+ }
+ return nullptr;
+ }
+
+ template <typename Destination, typename Source>
+ void flowOrigin(const Destination &D, const Source &S) {
+ OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D);
+ OriginID SrcOID = FactMgr.getOriginMgr().get(S);
+ CurrentBlockFacts.push_back(FactMgr.createFact<OriginFlowFact>(
+ DestOID, SrcOID, /*KillDest=*/false));
+ }
+
+ template <typename Destination, typename Source>
+ void killAndFlowOrigin(const Destination &D, const Source &S) {
+ OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D);
+ OriginID SrcOID = FactMgr.getOriginMgr().get(S);
+ CurrentBlockFacts.push_back(
+ FactMgr.createFact<OriginFlowFact>(DestOID, SrcOID, /*KillDest=*/true));
+ }
+
+ /// Checks if the expression is a `void("__lifetime_test_point_...")` cast.
+ /// If so, creates a `TestPointFact` and returns true.
+ bool handleTestPoint(const CXXFunctionalCastExpr *FCE) {
+ if (!FCE->getType()->isVoidType())
+ return false;
+
+ const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts();
+ if (const auto *SL = dyn_cast<StringLiteral>(SubExpr)) {
+ llvm::StringRef LiteralValue = SL->getString();
+ const std::string Prefix = "__lifetime_test_point_";
+
+ if (LiteralValue.starts_with(Prefix)) {
+ StringRef Annotation = LiteralValue.drop_front(Prefix.length());
+ CurrentBlockFacts.push_back(
+ FactMgr.createFact<TestPointFact>(Annotation));
+ return true;
+ }
+ }
+ return false;
+ }
+
+ void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) {
+ if (!hasOrigin(LHSExpr))
+ return;
+ // Find the underlying variable declaration for the left-hand side.
+ if (const auto *DRE_LHS =
+ dyn_cast<DeclRefExpr>(LHSExpr->IgnoreParenImpCasts())) {
+ markUseAsWrite(DRE_LHS);
+ if (const auto *VD_LHS = dyn_cast<ValueDecl>(DRE_LHS->getDecl())) {
+ // Kill the old loans of the destination origin and flow the new loans
+ // from the source origin.
+ killAndFlowOrigin(*VD_LHS, *RHSExpr);
+ }
+ }
+ }
+
+ // A DeclRefExpr will be treated as a use of the referenced decl. It will be
+ // checked for use-after-free unless it is later marked as being written to
+ // (e.g. on the left-hand side of an assignment).
+ void handleUse(const DeclRefExpr *DRE) {
+ if (isPointerType(DRE->getType())) {
+ UseFact *UF = FactMgr.createFact<UseFact>(DRE);
+ CurrentBlockFacts.push_back(UF);
+ assert(!UseFacts.contains(DRE));
+ UseFacts[DRE] = UF;
+ }
+ }
+
+ void markUseAsWrite(const DeclRefExpr *DRE) {
+ if (!isPointerType(DRE->getType()))
+ return;
+ assert(UseFacts.contains(DRE));
+ UseFacts[DRE]->markAsWritten();
+ }
+
+ FactManager &FactMgr;
+ AnalysisDeclContext &AC;
+ llvm::SmallVector<Fact *> CurrentBlockFacts;
+ // To distinguish between reads and writes for use-after-free checks, this map
+ // stores the `UseFact` for each `DeclRefExpr`. We initially identify all
+ // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use
+ // corresponding to the left-hand side is updated to be a "write", thereby
+ // exempting it from the check.
+ llvm::DenseMap<const DeclRefExpr *, UseFact *> UseFacts;
+};
+
+// ========================================================================= //
+// Generic Dataflow Analysis
+// ========================================================================= //
+
+enum class Direction { Forward, Backward };
+
+/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific
+/// `Fact`. identified by a lifetime-related event (`Fact`).
+///
+/// A `ProgramPoint` has "after" semantics: it represents the location
+/// immediately after its corresponding `Fact`.
+using ProgramPoint = const Fact *;
+
+/// A generic, policy-based driver for dataflow analyses. It combines
+/// the dataflow runner and the transferer logic into a single class hierarchy.
+///
+/// The derived class is expected to provide:
+/// - A `Lattice` type.
+/// - `StringRef getAnalysisName() const`
+/// - `Lattice getInitialState();` The initial state of the analysis.
+/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths.
+/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single
+/// lifetime-relevant `Fact` transforms the lattice state. Only overloads
+/// for facts relevant to the analysis need to be implemented.
+///
+/// \tparam Derived The CRTP derived class that implements the specific
+/// analysis.
+/// \tparam LatticeType The dataflow lattice used by the analysis.
+/// \tparam Dir The direction of the analysis (Forward or Backward).
+/// TODO: Maybe use the dataflow framework! The framework might need changes
+/// to support the current comparison done at block-entry.
+template <typename Derived, typename LatticeType, Direction Dir>
+class DataflowAnalysis {
+public:
+ using Lattice = LatticeType;
+ using Base = DataflowAnalysis<Derived, Lattice, Dir>;
+
+private:
+ const CFG &Cfg;
+ AnalysisDeclContext &AC;
+
+ /// The dataflow state before a basic block is processed.
+ llvm::DenseMap<const CFGBlock *, Lattice> InStates;
+ /// The dataflow state after a basic block is processed.
+ llvm::DenseMap<const CFGBlock *, Lattice> OutStates;
+ /// The dataflow state at a Program Point.
+ /// In a forward analysis, this is the state after the Fact at that point has
+ /// been applied, while in a backward analysis, it is the state before.
+ llvm::DenseMap<ProgramPoint, Lattice> PerPointStates;
+
+ static constexpr bool isForward() { return Dir == Direction::Forward; }
+
+protected:
+ FactManager &AllFacts;
+
+ explicit DataflowAnalysis(const CFG &C, AnalysisDeclContext &AC,
+ FactManager &F)
+ : Cfg(C), AC(AC), AllFacts(F) {}
+
+public:
+ void run() {
+ Derived &D = static_cast<Derived &>(*this);
+ llvm::TimeTraceScope Time(D.getAnalysisName());
+
+ using Worklist =
+ std::conditional_t<Dir == Direction::Forward, ForwardDataflowWorklist,
+ BackwardDataflowWorklist>;
+ Worklist W(Cfg, AC);
+
+ const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit();
+ InStates[Start] = D.getInitialState();
+ W.enqueueBlock(Start);
+
+ while (const CFGBlock *B = W.dequeue()) {
+ Lattice StateIn = *getInState(B);
+ Lattice StateOut = transferBlock(B, StateIn);
+ OutStates[B] = StateOut;
+ for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) {
+ if (!AdjacentB)
+ continue;
+ std::optional<Lattice> OldInState = getInState(AdjacentB);
+ Lattice NewInState =
+ !OldInState ? StateOut : D.join(*OldInState, StateOut);
+ // Enqueue the adjacent block if its in-state has changed or if we have
+ // never seen it.
+ if (!OldInState || NewInState != *OldInState) {
+ InStates[AdjacentB] = NewInState;
+ W.enqueueBlock(AdjacentB);
+ }
+ }
+ }
+ }
+
+protected:
+ Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); }
+
+ std::optional<Lattice> getInState(const CFGBlock *B) const {
+ auto It = InStates.find(B);
+ if (It == InStates.end())
+ return std::nullopt;
+ return It->second;
+ }
+
+ Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); }
+
+ void dump() const {
+ const Derived *D = static_cast<const Derived *>(this);
+ llvm::dbgs() << "==========================================\n";
+ llvm::dbgs() << D->getAnalysisName() << " results:\n";
+ llvm::dbgs() << "==========================================\n";
+ const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry();
+ getOutState(&B).dump(llvm::dbgs());
+ }
+
+private:
+ /// Computes the state at one end of a block by applying all its facts
+ /// sequentially to a given state from the other end.
+ Lattice transferBlock(const CFGBlock *Block, Lattice State) {
+ auto Facts = AllFacts.getFacts(Block);
+ if constexpr (isForward()) {
+ for (const Fact *F : Facts) {
+ State = transferFact(State, F);
+ PerPointStates[F] = State;
+ }
+ } else {
+ for (const Fact *F : llvm::reverse(Facts)) {
+ // In backward analysis, capture the state before applying the fact.
+ PerPointStates[F] = State;
+ State = transferFact(State, F);
+ }
+ }
+ return State;
+ }
+
+ Lattice transferFact(Lattice In, const Fact *F) {
+ assert(F);
+ Derived *D = static_cast<Derived *>(this);
+ switch (F->getKind()) {
+ case Fact::Kind::Issue:
+ return D->transfer(In, *F->getAs<IssueFact>());
+ case Fact::Kind::Expire:
+ return D->transfer(In, *F->getAs<ExpireFact>());
+ case Fact::Kind::OriginFlow:
+ return D->transfer(In, *F->getAs<OriginFlowFact>());
+ case Fact::Kind::ReturnOfOrigin:
+ return D->transfer(In, *F->getAs<ReturnOfOriginFact>());
+ case Fact::Kind::Use:
+ return D->transfer(In, *F->getAs<UseFact>());
+ case Fact::Kind::TestPoint:
+ return D->transfer(In, *F->getAs<TestPointFact>());
+ }
+ llvm_unreachable("Unknown fact kind");
+ }
+
+public:
+ Lattice transfer(Lattice In, const IssueFact &) { return In; }
+ Lattice transfer(Lattice In, const ExpireFact &) { return In; }
+ Lattice transfer(Lattice In, const OriginFlowFact &) { return In; }
+ Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; }
+ Lattice transfer(Lattice In, const UseFact &) { return In; }
+ Lattice transfer(Lattice In, const TestPointFact &) { return In; }
+};
+
+namespace utils {
+
+/// Computes the union of two ImmutableSets.
+template <typename T>
+static llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A,
+ llvm::ImmutableSet<T> B,
+ typename llvm::ImmutableSet<T>::Factory &F) {
+ if (A.getHeight() < B.getHeight())
+ std::swap(A, B);
+ for (const T &E : B)
+ A = F.add(A, E);
+ return A;
+}
+
+/// Describes the strategy for joining two `ImmutableMap` instances, primarily
+/// differing in how they handle keys that are unique to one of the maps.
+///
+/// A `Symmetric` join is universally correct, while an `Asymmetric` join
+/// serves as a performance optimization. The latter is applicable only when the
+/// join operation possesses a left identity element, allowing for a more
+/// efficient, one-sided merge.
+enum class JoinKind {
+ /// A symmetric join applies the `JoinValues` operation to keys unique to
+ /// either map, ensuring that values from both maps contribute to the result.
+ Symmetric,
+ /// An asymmetric join preserves keys unique to the first map as-is, while
+ /// applying the `JoinValues` operation only to keys unique to the second map.
+ Asymmetric,
+};
+
+/// Computes the key-wise union of two ImmutableMaps.
+// TODO(opt): This key-wise join is a performance bottleneck. A more
+// efficient merge could be implemented using a Patricia Trie or HAMT
+// instead of the current AVL-tree-based ImmutableMap.
+template <typename K, typename V, typename Joiner>
+static llvm::ImmutableMap<K, V>
+join(const llvm::ImmutableMap<K, V> &A, const llvm::ImmutableMap<K, V> &B,
+ typename llvm::ImmutableMap<K, V>::Factory &F, Joiner JoinValues,
+ JoinKind Kind) {
+ if (A.getHeight() < B.getHeight())
+ return join(B, A, F, JoinValues, Kind);
+
+ // For each element in B, join it with the corresponding element in A
+ // (or with an empty value if it doesn't exist in A).
+ llvm::ImmutableMap<K, V> Res = A;
+ for (const auto &Entry : B) {
+ const K &Key = Entry.first;
+ const V &ValB = Entry.second;
+ Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB));
+ }
+ if (Kind == JoinKind::Symmetric) {
+ for (const auto &Entry : A) {
+ const K &Key = Entry.first;
+ const V &ValA = Entry.second;
+ if (!B.contains(Key))
+ Res = F.add(Res, Key, JoinValues(&ValA, nullptr));
+ }
+ }
+ return Res;
+}
+} // namespace utils
+
+// ========================================================================= //
+// Loan Propagation Analysis
+// ========================================================================= //
+
+/// Represents the dataflow lattice for loan propagation.
+///
+/// This lattice tracks which loans each origin may hold at a given program
+/// point.The lattice has a finite height: An origin's loan set is bounded by
+/// the total number of loans in the function.
+/// TODO(opt): To reduce the lattice size, propagate origins of declarations,
+/// not expressions, because expressions are not visible across blocks.
+struct LoanPropagationLattice {
+ /// The map from an origin to the set of loans it contains.
+ OriginLoanMap Origins = OriginLoanMap(nullptr);
+
+ explicit LoanPropagationLattice(const OriginLoanMap &S) : Origins(S) {}
+ LoanPropagationLattice() = default;
+
+ bool operator==(const LoanPropagationLattice &Other) const {
+ return Origins == Other.Origins;
+ }
+ bool operator!=(const LoanPropagationLattice &Other) const {
+ return !(*this == Other);
+ }
+
+ void dump(llvm::raw_ostream &OS) const {
+ OS << "LoanPropagationLattice State:\n";
+ if (Origins.isEmpty())
+ OS << " <empty>\n";
+ for (const auto &Entry : Origins) {
+ if (Entry.second.isEmpty())
+ OS << " Origin " << Entry.first << " contains no loans\n";
+ for (const LoanID &LID : Entry.second)
+ OS << " Origin " << Entry.first << " contains Loan " << LID << "\n";
+ }
+ }
+};
+
+/// The analysis that tracks which loans belong to which origins.
+class LoanPropagationAnalysis
+ : public DataflowAnalysis<LoanPropagationAnalysis, LoanPropagationLattice,
+ Direction::Forward> {
+ OriginLoanMap::Factory &OriginLoanMapFactory;
+ LoanSet::Factory &LoanSetFactory;
+
+public:
+ LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F,
+ OriginLoanMap::Factory &OriginLoanMapFactory,
+ LoanSet::Factory &LoanSetFactory)
+ : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory),
+ LoanSetFactory(LoanSetFactory) {}
+
+ using Base::transfer;
+
+ StringRef getAnalysisName() const { return "LoanPropagation"; }
+
+ Lattice getInitialState() { return Lattice{}; }
+
+ /// Merges two lattices by taking the union of loans for each origin.
+ // TODO(opt): Keep the state small by removing origins which become dead.
+ Lattice join(Lattice A, Lattice B) {
+ OriginLoanMap JoinedOrigins = utils::join(
+ A.Origins, B.Origins, OriginLoanMapFactory,
+ [&](const LoanSet *S1, const LoanSet *S2) {
+ assert((S1 || S2) && "unexpectedly merging 2 empty sets");
+ if (!S1)
+ return *S2;
+ if (!S2)
+ return *S1;
+ return utils::join(*S1, *S2, LoanSetFactory);
+ },
+ // Asymmetric join is a performance win. For origins present only on one
+ // branch, the loan set can be carried over as-is.
+ utils::JoinKind::Asymmetric);
+ return Lattice(JoinedOrigins);
+ }
+
+ /// A new loan is issued to the origin. Old loans are erased.
+ Lattice transfer(Lattice In, const IssueFact &F) {
+ OriginID OID = F.getOriginID();
+ LoanID LID = F.getLoanID();
+ return LoanPropagationLattice(OriginLoanMapFactory.add(
+ In.Origins, OID,
+ LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID)));
+ }
+
+ /// A flow from source to destination. If `KillDest` is true, this replaces
+ /// the destination's loans with the source's. Otherwise, the source's loans
+ /// are merged into the destination's.
+ Lattice transfer(Lattice In, const OriginFlowFact &F) {
+ OriginID DestOID = F.getDestOriginID();
+ OriginID SrcOID = F.getSrcOriginID();
+
+ LoanSet DestLoans =
+ F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID);
+ LoanSet SrcLoans = getLoans(In, SrcOID);
+ LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory);
+
+ return LoanPropagationLattice(
+ OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans));
+ }
+
+ LoanSet getLoans(OriginID OID, ProgramPoint P) const {
+ return getLoans(getState(P), OID);
+ }
+
+private:
+ LoanSet getLoans(Lattice L, OriginID OID) const {
+ if (auto *Loans = L.Origins.lookup(OID))
+ return *Loans;
+ return LoanSetFactory.getEmptySet();
+ }
+};
+
+// ========================================================================= //
+// Live Origins Analysis
+// ========================================================================= //
+//
+// A backward dataflow analysis that determines which origins are "live" at each
+// program point. An origin is "live" at a program point if there's a potential
+// future use of the pointer it represents. Liveness is "generated" by a read of
+// origin's loan set (e.g., a `UseFact`) and is "killed" (i.e., it stops being
+// live) when its loan set is overwritten (e.g. a OriginFlow killing the
+// destination origin).
+//
+// This information is used for detecting use-after-free errors, as it allows us
+// to check if a live origin holds a loan to an object that has already expired.
+// ========================================================================= //
+
+/// Information about why an origin is live at a program point.
+struct LivenessInfo {
+ /// The use that makes the origin live. If liveness is propagated from
+ /// multiple uses along different paths, this will point to the use appearing
+ /// earlier in the translation unit.
+ /// This is 'null' when the origin is not live.
+ const UseFact *CausingUseFact;
+ /// The kind of liveness of the origin.
+ /// `Must`: The origin is live on all control-flow paths from the current
+ /// point to the function's exit (i.e. the current point is dominated by a set
+ /// of uses).
+ /// `Maybe`: indicates it is live on some but not all paths.
+ ///
+ /// This determines the diagnostic's confidence level.
+ /// `Must`-be-alive at expiration implies a definite use-after-free,
+ /// while `Maybe`-be-alive suggests a potential one on some paths.
+ LivenessKind Kind;
+
+ LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {}
+ LivenessInfo(const UseFact *UF, LivenessKind K)
+ : CausingUseFact(UF), Kind(K) {}
+
+ bool operator==(const LivenessInfo &Other) const {
+ return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind;
+ }
+ bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); }
+
+ void Profile(llvm::FoldingSetNodeID &IDBuilder) const {
+ IDBuilder.AddPointer(CausingUseFact);
+ IDBuilder.Add(Kind);
+ }
+};
+
+using LivenessMap = llvm::ImmutableMap<OriginID, LivenessInfo>;
+
+/// The dataflow lattice for origin liveness analysis.
+/// It tracks which origins are live, why they're live (which UseFact),
+/// and the confidence level of that liveness.
+struct LivenessLattice {
+ LivenessMap LiveOrigins;
+
+ LivenessLattice() : LiveOrigins(nullptr) {};
+
+ explicit LivenessLattice(LivenessMap L) : LiveOrigins(L) {}
+
+ bool operator==(const LivenessLattice &Other) const {
+ return LiveOrigins == Other.LiveOrigins;
+ }
+
+ bool operator!=(const LivenessLattice &Other) const {
+ return !(*this == Other);
+ }
+
+ void dump(llvm::raw_ostream &OS, const OriginManager &OM) const {
+ if (LiveOrigins.isEmpty())
+ OS << " <empty>\n";
+ for (const auto &Entry : LiveOrigins) {
+ OriginID OID = Entry.first;
+ const LivenessInfo &Info = Entry.second;
+ OS << " ";
+ OM.dump(OID, OS);
+ OS << " is ";
+ switch (Info.Kind) {
+ case LivenessKind::Must:
+ OS << "definitely";
+ break;
+ case LivenessKind::Maybe:
+ OS << "maybe";
+ break;
+ case LivenessKind::Dead:
+ llvm_unreachable("liveness kind of live origins should not be dead.");
+ }
+ OS << " live at this point\n";
+ }
+ }
+};
+
+/// The analysis that tracks which origins are live, with granular information
+/// about the causing use fact and confidence level. This is a backward
+/// analysis.
+class LiveOriginAnalysis
+ : public DataflowAnalysis<LiveOriginAnalysis, LivenessLattice,
+ Direction::Backward> {
+ FactManager &FactMgr;
+ LivenessMap::Factory &Factory;
+
+public:
+ LiveOriginAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F,
+ LivenessMap::Factory &SF)
+ : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {}
+ using DataflowAnalysis<LiveOriginAnalysis, Lattice,
+ Direction::Backward>::transfer;
+
+ StringRef getAnalysisName() const { return "LiveOrigins"; }
+
+ Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); }
+
+ /// Merges two lattices by combining liveness information.
+ /// When the same origin has different confidence levels, we take the lower
+ /// one.
+ Lattice join(Lattice L1, Lattice L2) const {
+ LivenessMap Merged = L1.LiveOrigins;
+ // Take the earliest UseFact to make the join hermetic and commutative.
+ auto CombineUseFact = [](const UseFact &A,
+ const UseFact &B) -> const UseFact * {
+ return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A
+ : &B;
+ };
+ auto CombineLivenessKind = [](LivenessKind K1,
+ LivenessKind K2) -> LivenessKind {
+ assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead.");
+ assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead.");
+ // Only return "Must" if both paths are "Must", otherwise Maybe.
+ if (K1 == LivenessKind::Must && K2 == LivenessKind::Must)
+ return LivenessKind::Must;
+ return LivenessKind::Maybe;
+ };
+ auto CombineLivenessInfo = [&](const LivenessInfo *L1,
+ const LivenessInfo *L2) -> LivenessInfo {
+ assert((L1 || L2) && "unexpectedly merging 2 empty sets");
+ if (!L1)
+ return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe);
+ if (!L2)
+ return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe);
+ return LivenessInfo(
+ CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact),
+ CombineLivenessKind(L1->Kind, L2->Kind));
+ };
+ return Lattice(utils::join(
+ L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo,
+ // A symmetric join is required here. If an origin is live on one
+ // branch but not the other, its confidence must be demoted to `Maybe`.
+ utils::JoinKind::Symmetric));
+ }
+
+ /// A read operation makes the origin live with definite confidence, as it
+ /// dominates this program point. A write operation kills the liveness of
+ /// the origin since it overwrites the value.
+ Lattice transfer(Lattice In, const UseFact &UF) {
+ OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr());
+ // Write kills liveness.
+ if (UF.isWritten())
+ return Lattice(Factory.remove(In.LiveOrigins, OID));
+ // Read makes origin live with definite confidence (dominates this point).
+ return Lattice(Factory.add(In.LiveOrigins, OID,
+ LivenessInfo(&UF, LivenessKind::Must)));
+ }
+
+ /// Issuing a new loan to an origin kills its liveness.
+ Lattice transfer(Lattice In, const IssueFact &IF) {
+ return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID()));
+ }
+
+ /// An OriginFlow kills the liveness of the destination origin if `KillDest`
+ /// is true. Otherwise, it propagates liveness from destination to source.
+ Lattice transfer(Lattice In, const OriginFlowFact &OF) {
+ if (!OF.getKillDest())
+ return In;
+ return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID()));
+ }
+
+ LivenessMap getLiveOrigins(ProgramPoint P) const {
+ return getState(P).LiveOrigins;
+ }
+
+ // Dump liveness values on all test points in the program.
+ void dump(llvm::raw_ostream &OS, const LifetimeSafetyAnalysis &LSA) const {
+ llvm::dbgs() << "==========================================\n";
+ llvm::dbgs() << getAnalysisName() << " results:\n";
+ llvm::dbgs() << "==========================================\n";
+ for (const auto &Entry : LSA.getTestPoints()) {
+ OS << "TestPoint: " << Entry.getKey() << "\n";
+ getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr());
+ }
+ }
+};
+
+// ========================================================================= //
+// Lifetime checker and Error reporter
+// ========================================================================= //
+
+/// Struct to store the complete context for a potential lifetime violation.
+struct PendingWarning {
+ SourceLocation ExpiryLoc; // Where the loan expired.
+ const Expr *UseExpr; // Where the origin holding this loan was used.
+ Confidence ConfidenceLevel;
+};
+
+class LifetimeChecker {
+private:
+ llvm::DenseMap<LoanID, PendingWarning> FinalWarningsMap;
+ LoanPropagationAnalysis &LoanPropagation;
+ LiveOriginAnalysis &LiveOrigins;
+ FactManager &FactMgr;
+ AnalysisDeclContext &ADC;
+ LifetimeSafetyReporter *Reporter;
+
+public:
+ LifetimeChecker(LoanPropagationAnalysis &LPA, LiveOriginAnalysis &LOA,
+ FactManager &FM, AnalysisDeclContext &ADC,
+ LifetimeSafetyReporter *Reporter)
+ : LoanPropagation(LPA), LiveOrigins(LOA), FactMgr(FM), ADC(ADC),
+ Reporter(Reporter) {}
+
+ void run() {
+ llvm::TimeTraceScope TimeProfile("LifetimeChecker");
+ for (const CFGBlock *B : *ADC.getAnalysis<PostOrderCFGView>())
+ for (const Fact *F : FactMgr.getFacts(B))
+ if (const auto *EF = F->getAs<ExpireFact>())
+ checkExpiry(EF);
+ issuePendingWarnings();
+ }
+
+ /// Checks for use-after-free errors when a loan expires.
+ ///
+ /// This method examines all live origins at the expiry point and determines
+ /// if any of them hold the expiring loan. If so, it creates a pending
+ /// warning with the appropriate confidence level based on the liveness
+ /// information. The confidence reflects whether the origin is definitely
+ /// or maybe live at this point.
+ ///
+ /// Note: This implementation considers only the confidence of origin
+ /// liveness. Future enhancements could also consider the confidence of loan
+ /// propagation (e.g., a loan may only be held on some execution paths).
+ void checkExpiry(const ExpireFact *EF) {
+ LoanID ExpiredLoan = EF->getLoanID();
+ LivenessMap Origins = LiveOrigins.getLiveOrigins(EF);
+ Confidence CurConfidence = Confidence::None;
+ const UseFact *BadUse = nullptr;
+ for (auto &[OID, LiveInfo] : Origins) {
+ LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF);
+ if (!HeldLoans.contains(ExpiredLoan))
+ continue;
+ // Loan is defaulted.
+ Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind);
+ if (CurConfidence < NewConfidence) {
+ CurConfidence = NewConfidence;
+ BadUse = LiveInfo.CausingUseFact;
+ }
+ }
+ if (!BadUse)
+ return;
+ // We have a use-after-free.
+ Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel;
+ if (LastConf >= CurConfidence)
+ return;
+ FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(),
+ /*UseExpr=*/BadUse->getUseExpr(),
+ /*ConfidenceLevel=*/CurConfidence};
+ }
+
+ static Confidence livenessKindToConfidence(LivenessKind K) {
+ switch (K) {
+ case LivenessKind::Must:
+ return Confidence::Definite;
+ case LivenessKind::Maybe:
+ return Confidence::Maybe;
+ case LivenessKind::Dead:
+ return Confidence::None;
+ }
+ llvm_unreachable("unknown liveness kind");
+ }
+
+ void issuePendingWarnings() {
+ if (!Reporter)
+ return;
+ for (const auto &[LID, Warning] : FinalWarningsMap) {
+ const Loan &L = FactMgr.getLoanMgr().getLoan(LID);
+ const Expr *IssueExpr = L.IssueExpr;
+ Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr,
+ Warning.ExpiryLoc, Warning.ConfidenceLevel);
+ }
+ }
+};
+
+// ========================================================================= //
+// LifetimeSafetyAnalysis Class Implementation
+// ========================================================================= //
+
+/// An object to hold the factories for immutable collections, ensuring
+/// that all created states share the same underlying memory management.
+struct LifetimeFactory {
+ llvm::BumpPtrAllocator Allocator;
+ OriginLoanMap::Factory OriginMapFactory{Allocator, /*canonicalize=*/false};
+ LoanSet::Factory LoanSetFactory{Allocator, /*canonicalize=*/false};
+ LivenessMap::Factory LivenessMapFactory{Allocator, /*canonicalize=*/false};
+};
+
+// We need this here for unique_ptr with forward declared class.
+LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default;
+
+LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC,
+ LifetimeSafetyReporter *Reporter)
+ : AC(AC), Reporter(Reporter), Factory(std::make_unique<LifetimeFactory>()),
+ FactMgr(std::make_unique<FactManager>()) {}
+
+void LifetimeSafetyAnalysis::run() {
+ llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis");
+
+ const CFG &Cfg = *AC.getCFG();
+ DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(),
+ /*ShowColors=*/true));
+
+ FactGenerator FactGen(*FactMgr, AC);
+ FactGen.run();
+ DEBUG_WITH_TYPE("LifetimeFacts", FactMgr->dump(Cfg, AC));
+
+ /// TODO(opt): Consider optimizing individual blocks before running the
+ /// dataflow analysis.
+ /// 1. Expression Origins: These are assigned once and read at most once,
+ /// forming simple chains. These chains can be compressed into a single
+ /// assignment.
+ /// 2. Block-Local Loans: Origins of expressions are never read by other
+ /// blocks; only Decls are visible. Therefore, loans in a block that
+ /// never reach an Origin associated with a Decl can be safely dropped by
+ /// the analysis.
+ /// 3. Collapse ExpireFacts belonging to same source location into a single
+ /// Fact.
+ LoanPropagation = std::make_unique<LoanPropagationAnalysis>(
+ Cfg, AC, *FactMgr, Factory->OriginMapFactory, Factory->LoanSetFactory);
+ LoanPropagation->run();
+
+ LiveOrigins = std::make_unique<LiveOriginAnalysis>(
+ Cfg, AC, *FactMgr, Factory->LivenessMapFactory);
+ LiveOrigins->run();
+ DEBUG_WITH_TYPE("LiveOrigins", LiveOrigins->dump(llvm::dbgs(), *this));
+
+ LifetimeChecker Checker(*LoanPropagation, *LiveOrigins, *FactMgr, AC,
+ Reporter);
+ Checker.run();
+}
+
+LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID,
+ ProgramPoint PP) const {
+ assert(LoanPropagation && "Analysis has not been run.");
+ return LoanPropagation->getLoans(OID, PP);
+}
+
+std::optional<OriginID>
+LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const {
+ assert(FactMgr && "FactManager not initialized");
+ // This assumes the OriginManager's `get` can find an existing origin.
+ // We might need a `find` method on OriginManager to avoid `getOrCreate` logic
+ // in a const-query context if that becomes an issue.
+ return FactMgr->getOriginMgr().get(*D);
+}
+
+std::vector<LoanID>
+LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const {
+ assert(FactMgr && "FactManager not initialized");
+ std::vector<LoanID> Result;
+ for (const Loan &L : FactMgr->getLoanMgr().getLoans())
+ if (L.Path.D == VD)
+ Result.push_back(L.ID);
+ return Result;
+}
+
+std::vector<std::pair<OriginID, LivenessKind>>
+LifetimeSafetyAnalysis::getLiveOriginsAtPoint(ProgramPoint PP) const {
+ assert(LiveOrigins && "LiveOriginAnalysis has not been run.");
+ std::vector<std::pair<OriginID, LivenessKind>> Result;
+ for (auto &[OID, Info] : LiveOrigins->getLiveOrigins(PP))
+ Result.push_back({OID, Info.Kind});
+ return Result;
+}
+
+llvm::StringMap<ProgramPoint> LifetimeSafetyAnalysis::getTestPoints() const {
+ assert(FactMgr && "FactManager not initialized");
+ llvm::StringMap<ProgramPoint> AnnotationToPointMap;
+ for (const CFGBlock *Block : *AC.getCFG()) {
+ for (const Fact *F : FactMgr->getFacts(Block)) {
+ if (const auto *TPF = F->getAs<TestPointFact>()) {
+ StringRef PointName = TPF->getAnnotation();
+ assert(AnnotationToPointMap.find(PointName) ==
+ AnnotationToPointMap.end() &&
+ "more than one test points with the same name");
+ AnnotationToPointMap[PointName] = F;
+ }
+ }
+ }
+ return AnnotationToPointMap;
+}
+} // namespace internal
+
+void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC,
+ LifetimeSafetyReporter *Reporter) {
+ internal::LifetimeSafetyAnalysis Analysis(AC, Reporter);
+ Analysis.run();
+}
+} // namespace clang::lifetimes