diff options
Diffstat (limited to 'clang/lib')
17 files changed, 1295 insertions, 1556 deletions
diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 5a26f3e..1dbd415 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -21,8 +21,6 @@ add_clang_library(clangAnalysis FixitUtil.cpp IntervalPartition.cpp IssueHash.cpp - LifetimeAnnotations.cpp - LifetimeSafety.cpp LiveVariables.cpp MacroExpansionContext.cpp ObjCNoReturn.cpp @@ -51,3 +49,4 @@ add_clang_library(clangAnalysis add_subdirectory(plugins) add_subdirectory(FlowSensitive) +add_subdirectory(LifetimeSafety) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp deleted file mode 100644 index 6196ec3..0000000 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ /dev/null @@ -1,1546 +0,0 @@ -//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety.h" -#include "clang/AST/Decl.h" -#include "clang/AST/Expr.h" -#include "clang/AST/StmtVisitor.h" -#include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" -#include "clang/Analysis/Analyses/PostOrderCFGView.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" -#include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TimeProfiler.h" -#include <cstdint> -#include <memory> -#include <optional> - -namespace clang::lifetimes { -namespace internal { - -/// Represents the storage location being borrowed, e.g., a specific stack -/// variable. -/// TODO: Model access paths of other types, e.g., s.field, heap and globals. -struct AccessPath { - const clang::ValueDecl *D; - - AccessPath(const clang::ValueDecl *D) : D(D) {} -}; - -/// Information about a single borrow, or "Loan". A loan is created when a -/// reference or pointer is created. -struct Loan { - /// TODO: Represent opaque loans. - /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it - /// is represented as empty LoanSet - LoanID ID; - AccessPath Path; - /// The expression that creates the loan, e.g., &x. - const Expr *IssueExpr; - - Loan(LoanID id, AccessPath path, const Expr *IssueExpr) - : ID(id), Path(path), IssueExpr(IssueExpr) {} - - void dump(llvm::raw_ostream &OS) const { - OS << ID << " (Path: "; - OS << Path.D->getNameAsString() << ")"; - } -}; - -/// An Origin is a symbolic identifier that represents the set of possible -/// loans a pointer-like object could hold at any given time. -/// TODO: Enhance the origin model to handle complex types, pointer -/// indirection and reborrowing. The plan is to move from a single origin per -/// variable/expression to a "list of origins" governed by the Type. -/// For example, the type 'int**' would have two origins. -/// See discussion: -/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 -struct Origin { - OriginID ID; - /// A pointer to the AST node that this origin represents. This union - /// distinguishes between origins from declarations (variables or parameters) - /// and origins from expressions. - llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr; - - Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} - Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} - - const clang::ValueDecl *getDecl() const { - return Ptr.dyn_cast<const clang::ValueDecl *>(); - } - const clang::Expr *getExpr() const { - return Ptr.dyn_cast<const clang::Expr *>(); - } -}; - -/// Manages the creation, storage and retrieval of loans. -class LoanManager { -public: - LoanManager() = default; - - Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { - AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); - return AllLoans.back(); - } - - const Loan &getLoan(LoanID ID) const { - assert(ID.Value < AllLoans.size()); - return AllLoans[ID.Value]; - } - llvm::ArrayRef<Loan> getLoans() const { return AllLoans; } - -private: - LoanID getNextLoanID() { return NextLoanID++; } - - LoanID NextLoanID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector<Loan> AllLoans; -}; - -/// Manages the creation, storage, and retrieval of origins for pointer-like -/// variables and expressions. -class OriginManager { -public: - OriginManager() = default; - - Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) { - AllOrigins.emplace_back(ID, &D); - return AllOrigins.back(); - } - Origin &addOrigin(OriginID ID, const clang::Expr &E) { - AllOrigins.emplace_back(ID, &E); - return AllOrigins.back(); - } - - // TODO: Mark this method as const once we remove the call to getOrCreate. - OriginID get(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - // If the expression itself has no specific origin, and it's a reference - // to a declaration, its origin is that of the declaration it refers to. - // For pointer types, where we don't pre-emptively create an origin for the - // DeclRefExpr itself. - if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) - return get(*DRE->getDecl()); - // TODO: This should be an assert(It != ExprToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - return getOrCreate(E); - } - - OriginID get(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - // TODO: This should be an assert(It != DeclToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - if (It == DeclToOriginID.end()) - return getOrCreate(D); - - return It->second; - } - - OriginID getOrCreate(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - - OriginID NewID = getNextOriginID(); - addOrigin(NewID, E); - ExprToOriginID[&E] = NewID; - return NewID; - } - - const Origin &getOrigin(OriginID ID) const { - assert(ID.Value < AllOrigins.size()); - return AllOrigins[ID.Value]; - } - - llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; } - - OriginID getOrCreate(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - if (It != DeclToOriginID.end()) - return It->second; - OriginID NewID = getNextOriginID(); - addOrigin(NewID, D); - DeclToOriginID[&D] = NewID; - return NewID; - } - - void dump(OriginID OID, llvm::raw_ostream &OS) const { - OS << OID << " ("; - Origin O = getOrigin(OID); - if (const ValueDecl *VD = O.getDecl()) - OS << "Decl: " << VD->getNameAsString(); - else if (const Expr *E = O.getExpr()) - OS << "Expr: " << E->getStmtClassName(); - else - OS << "Unknown"; - OS << ")"; - } - -private: - OriginID getNextOriginID() { return NextOriginID++; } - - OriginID NextOriginID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector<Origin> AllOrigins; - llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID; - llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID; -}; - -/// An abstract base class for a single, atomic lifetime-relevant event. -class Fact { - -public: - enum class Kind : uint8_t { - /// A new loan is issued from a borrow expression (e.g., &x). - Issue, - /// A loan expires as its underlying storage is freed (e.g., variable goes - /// out of scope). - Expire, - /// An origin is propagated from a source to a destination (e.g., p = q). - /// This can also optionally kill the destination origin before flowing into - /// it. Otherwise, the source's loan set is merged into the destination's - /// loan set. - OriginFlow, - /// An origin escapes the function by flowing into the return value. - ReturnOfOrigin, - /// An origin is used (eg. appears as l-value expression like DeclRefExpr). - Use, - /// A marker for a specific point in the code, for testing. - TestPoint, - }; - -private: - Kind K; - -protected: - Fact(Kind K) : K(K) {} - -public: - virtual ~Fact() = default; - Kind getKind() const { return K; } - - template <typename T> const T *getAs() const { - if (T::classof(this)) - return static_cast<const T *>(this); - return nullptr; - } - - virtual void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const { - OS << "Fact (Kind: " << static_cast<int>(K) << ")\n"; - } -}; - -class IssueFact : public Fact { - LoanID LID; - OriginID OID; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } - - IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} - LoanID getLoanID() const { return LID; } - OriginID getOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &OM) const override { - OS << "Issue ("; - LM.getLoan(getLoanID()).dump(OS); - OS << ", ToOrigin: "; - OM.dump(getOriginID(), OS); - OS << ")\n"; - } -}; - -class ExpireFact : public Fact { - LoanID LID; - SourceLocation ExpiryLoc; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } - - ExpireFact(LoanID LID, SourceLocation ExpiryLoc) - : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} - - LoanID getLoanID() const { return LID; } - SourceLocation getExpiryLoc() const { return ExpiryLoc; } - - void dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &) const override { - OS << "Expire ("; - LM.getLoan(getLoanID()).dump(OS); - OS << ")\n"; - } -}; - -class OriginFlowFact : public Fact { - OriginID OIDDest; - OriginID OIDSrc; - // True if the destination origin should be killed (i.e., its current loans - // cleared) before the source origin's loans are flowed into it. - bool KillDest; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::OriginFlow; - } - - OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) - : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), - KillDest(KillDest) {} - - OriginID getDestOriginID() const { return OIDDest; } - OriginID getSrcOriginID() const { return OIDSrc; } - bool getKillDest() const { return KillDest; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override { - OS << "OriginFlow (Dest: "; - OM.dump(getDestOriginID(), OS); - OS << ", Src: "; - OM.dump(getSrcOriginID(), OS); - OS << (getKillDest() ? "" : ", Merge"); - OS << ")\n"; - } -}; - -class ReturnOfOriginFact : public Fact { - OriginID OID; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::ReturnOfOrigin; - } - - ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} - OriginID getReturnedOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override { - OS << "ReturnOfOrigin ("; - OM.dump(getReturnedOriginID(), OS); - OS << ")\n"; - } -}; - -class UseFact : public Fact { - const Expr *UseExpr; - // True if this use is a write operation (e.g., left-hand side of assignment). - // Write operations are exempted from use-after-free checks. - bool IsWritten = false; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } - - UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} - - OriginID getUsedOrigin(const OriginManager &OM) const { - // TODO: Remove const cast and make OriginManager::get as const. - return const_cast<OriginManager &>(OM).get(*UseExpr); - } - const Expr *getUseExpr() const { return UseExpr; } - void markAsWritten() { IsWritten = true; } - bool isWritten() const { return IsWritten; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override { - OS << "Use ("; - OM.dump(getUsedOrigin(OM), OS); - OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; - } -}; - -/// A dummy-fact used to mark a specific point in the code for testing. -/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. -class TestPointFact : public Fact { - StringRef Annotation; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } - - explicit TestPointFact(StringRef Annotation) - : Fact(Kind::TestPoint), Annotation(Annotation) {} - - StringRef getAnnotation() const { return Annotation; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const override { - OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; - } -}; - -class FactManager { -public: - llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const { - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) - return It->second; - return {}; - } - - void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) { - if (!NewFacts.empty()) - BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); - } - - template <typename FactType, typename... Args> - FactType *createFact(Args &&...args) { - void *Mem = FactAllocator.Allocate<FactType>(); - return new (Mem) FactType(std::forward<Args>(args)...); - } - - void dump(const CFG &Cfg, AnalysisDeclContext &AC) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << " Lifetime Analysis Facts:\n"; - llvm::dbgs() << "==========================================\n"; - if (const Decl *D = AC.getDecl()) - if (const auto *ND = dyn_cast<NamedDecl>(D)) - llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; - // Print blocks in the order as they appear in code for a stable ordering. - for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) { - llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) { - for (const Fact *F : It->second) { - llvm::dbgs() << " "; - F->dump(llvm::dbgs(), LoanMgr, OriginMgr); - } - } - llvm::dbgs() << " End of Block\n"; - } - } - - LoanManager &getLoanMgr() { return LoanMgr; } - OriginManager &getOriginMgr() { return OriginMgr; } - -private: - LoanManager LoanMgr; - OriginManager OriginMgr; - llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>> - BlockToFactsMap; - llvm::BumpPtrAllocator FactAllocator; -}; - -class FactGenerator : public ConstStmtVisitor<FactGenerator> { - using Base = ConstStmtVisitor<FactGenerator>; - -public: - FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) - : FactMgr(FactMgr), AC(AC) {} - - void run() { - llvm::TimeTraceScope TimeProfile("FactGenerator"); - // Iterate through the CFG blocks in reverse post-order to ensure that - // initializations and destructions are processed in the correct sequence. - for (const CFGBlock *Block : *AC.getAnalysis<PostOrderCFGView>()) { - CurrentBlockFacts.clear(); - for (unsigned I = 0; I < Block->size(); ++I) { - const CFGElement &Element = Block->Elements[I]; - if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>()) - Visit(CS->getStmt()); - else if (std::optional<CFGAutomaticObjDtor> DtorOpt = - Element.getAs<CFGAutomaticObjDtor>()) - handleDestructor(*DtorOpt); - } - FactMgr.addBlockFacts(Block, CurrentBlockFacts); - } - } - - void VisitDeclStmt(const DeclStmt *DS) { - for (const Decl *D : DS->decls()) - if (const auto *VD = dyn_cast<VarDecl>(D)) - if (hasOrigin(VD)) - if (const Expr *InitExpr = VD->getInit()) - killAndFlowOrigin(*VD, *InitExpr); - } - - void VisitDeclRefExpr(const DeclRefExpr *DRE) { - handleUse(DRE); - // For non-pointer/non-view types, a reference to the variable's storage - // is a borrow. We create a loan for it. - // For pointer/view types, we stick to the existing model for now and do - // not create an extra origin for the l-value expression itself. - - // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is - // not sufficient to model the different levels of indirection. The current - // single-origin model cannot distinguish between a loan to the variable's - // storage and a loan to what it points to. A multi-origin model would be - // required for this. - if (!isPointerType(DRE->getType())) { - if (const Loan *L = createLoan(DRE)) { - OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); - CurrentBlockFacts.push_back( - FactMgr.createFact<IssueFact>(L->ID, ExprOID)); - } - } - } - - void VisitCXXConstructExpr(const CXXConstructExpr *CCE) { - if (isGslPointerType(CCE->getType())) { - handleGSLPointerConstruction(CCE); - return; - } - } - - void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { - // Specifically for conversion operators, - // like `std::string_view p = std::string{};` - if (isGslPointerType(MCE->getType()) && - isa<CXXConversionDecl>(MCE->getCalleeDecl())) { - // The argument is the implicit object itself. - handleFunctionCall(MCE, MCE->getMethodDecl(), - {MCE->getImplicitObjectArgument()}, - /*IsGslConstruction=*/true); - } - if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { - // Construct the argument list, with the implicit 'this' object as the - // first argument. - llvm::SmallVector<const Expr *, 4> Args; - Args.push_back(MCE->getImplicitObjectArgument()); - Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); - - handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); - } - } - - void VisitCallExpr(const CallExpr *CE) { - handleFunctionCall(CE, CE->getDirectCallee(), - {CE->getArgs(), CE->getNumArgs()}); - } - - void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { - /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized - /// pointers can use the same type of loan. - FactMgr.getOriginMgr().getOrCreate(*N); - } - - void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { - if (!hasOrigin(ICE)) - return; - // An ImplicitCastExpr node itself gets an origin, which flows from the - // origin of its sub-expression (after stripping its own parens/casts). - killAndFlowOrigin(*ICE, *ICE->getSubExpr()); - } - - void VisitUnaryOperator(const UnaryOperator *UO) { - if (UO->getOpcode() == UO_AddrOf) { - const Expr *SubExpr = UO->getSubExpr(); - // Taking address of a pointer-type expression is not yet supported and - // will be supported in multi-origin model. - if (isPointerType(SubExpr->getType())) - return; - // The origin of an address-of expression (e.g., &x) is the origin of - // its sub-expression (x). This fact will cause the dataflow analysis - // to propagate any loans held by the sub-expression's origin to the - // origin of this UnaryOperator expression. - killAndFlowOrigin(*UO, *SubExpr); - } - } - - void VisitReturnStmt(const ReturnStmt *RS) { - if (const Expr *RetExpr = RS->getRetValue()) { - if (hasOrigin(RetExpr)) { - OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); - CurrentBlockFacts.push_back( - FactMgr.createFact<ReturnOfOriginFact>(OID)); - } - } - } - - void VisitBinaryOperator(const BinaryOperator *BO) { - if (BO->isAssignmentOp()) - handleAssignment(BO->getLHS(), BO->getRHS()); - } - - void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { - // Assignment operators have special "kill-then-propagate" semantics - // and are handled separately. - if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { - handleAssignment(OCE->getArg(0), OCE->getArg(1)); - return; - } - handleFunctionCall(OCE, OCE->getDirectCallee(), - {OCE->getArgs(), OCE->getNumArgs()}, - /*IsGslConstruction=*/false); - } - - void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) { - // Check if this is a test point marker. If so, we are done with this - // expression. - if (handleTestPoint(FCE)) - return; - if (isGslPointerType(FCE->getType())) - killAndFlowOrigin(*FCE, *FCE->getSubExpr()); - } - - void VisitInitListExpr(const InitListExpr *ILE) { - if (!hasOrigin(ILE)) - return; - // For list initialization with a single element, like `View{...}`, the - // origin of the list itself is the origin of its single element. - if (ILE->getNumInits() == 1) - killAndFlowOrigin(*ILE, *ILE->getInit(0)); - } - - void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE) { - if (!hasOrigin(MTE)) - return; - // A temporary object's origin is the same as the origin of the - // expression that initializes it. - killAndFlowOrigin(*MTE, *MTE->getSubExpr()); - } - - void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { - /// TODO: Also handle trivial destructors (e.g., for `int` - /// variables) which will never have a CFGAutomaticObjDtor node. - /// TODO: Handle loans to temporaries. - /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the - /// lifetime ends. - const VarDecl *DestructedVD = DtorOpt.getVarDecl(); - if (!DestructedVD) - return; - // Iterate through all loans to see if any expire. - /// TODO(opt): Do better than a linear search to find loans associated with - /// 'DestructedVD'. - for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { - const AccessPath &LoanPath = L.Path; - // Check if the loan is for a stack variable and if that variable - // is the one being destructed. - if (LoanPath.D == DestructedVD) - CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>( - L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); - } - } - -private: - static bool isGslPointerType(QualType QT) { - if (const auto *RD = QT->getAsCXXRecordDecl()) { - // We need to check the template definition for specializations. - if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) - return CTSD->getSpecializedTemplate() - ->getTemplatedDecl() - ->hasAttr<PointerAttr>(); - return RD->hasAttr<PointerAttr>(); - } - return false; - } - - static bool isPointerType(QualType QT) { - return QT->isPointerOrReferenceType() || isGslPointerType(QT); - } - // Check if a type has an origin. - static bool hasOrigin(const Expr *E) { - return E->isGLValue() || isPointerType(E->getType()); - } - - static bool hasOrigin(const VarDecl *VD) { - return isPointerType(VD->getType()); - } - - void handleGSLPointerConstruction(const CXXConstructExpr *CCE) { - assert(isGslPointerType(CCE->getType())); - if (CCE->getNumArgs() != 1) - return; - if (hasOrigin(CCE->getArg(0))) - killAndFlowOrigin(*CCE, *CCE->getArg(0)); - else - // This could be a new borrow. - handleFunctionCall(CCE, CCE->getConstructor(), - {CCE->getArgs(), CCE->getNumArgs()}, - /*IsGslConstruction=*/true); - } - - /// Checks if a call-like expression creates a borrow by passing a value to a - /// reference parameter, creating an IssueFact if it does. - /// \param IsGslConstruction True if this is a GSL construction where all - /// argument origins should flow to the returned origin. - void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, - ArrayRef<const Expr *> Args, - bool IsGslConstruction = false) { - // Ignore functions returning values with no origin. - if (!FD || !hasOrigin(Call)) - return; - auto IsArgLifetimeBound = [FD](unsigned I) -> bool { - const ParmVarDecl *PVD = nullptr; - if (const auto *Method = dyn_cast<CXXMethodDecl>(FD); - Method && Method->isInstance()) { - if (I == 0) - // For the 'this' argument, the attribute is on the method itself. - return implicitObjectParamIsLifetimeBound(Method); - if ((I - 1) < Method->getNumParams()) - // For explicit arguments, find the corresponding parameter - // declaration. - PVD = Method->getParamDecl(I - 1); - } else if (I < FD->getNumParams()) - // For free functions or static methods. - PVD = FD->getParamDecl(I); - return PVD ? PVD->hasAttr<clang::LifetimeBoundAttr>() : false; - }; - if (Args.empty()) - return; - bool killedSrc = false; - for (unsigned I = 0; I < Args.size(); ++I) - if (IsGslConstruction || IsArgLifetimeBound(I)) { - if (!killedSrc) { - killedSrc = true; - killAndFlowOrigin(*Call, *Args[I]); - } else - flowOrigin(*Call, *Args[I]); - } - } - - /// Creates a loan for the storage path of a given declaration reference. - /// This function should be called whenever a DeclRefExpr represents a borrow. - /// \param DRE The declaration reference expression that initiates the borrow. - /// \return The new Loan on success, nullptr otherwise. - const Loan *createLoan(const DeclRefExpr *DRE) { - if (const auto *VD = dyn_cast<ValueDecl>(DRE->getDecl())) { - AccessPath Path(VD); - // The loan is created at the location of the DeclRefExpr. - return &FactMgr.getLoanMgr().addLoan(Path, DRE); - } - return nullptr; - } - - template <typename Destination, typename Source> - void flowOrigin(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back(FactMgr.createFact<OriginFlowFact>( - DestOID, SrcOID, /*KillDest=*/false)); - } - - template <typename Destination, typename Source> - void killAndFlowOrigin(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back( - FactMgr.createFact<OriginFlowFact>(DestOID, SrcOID, /*KillDest=*/true)); - } - - /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. - /// If so, creates a `TestPointFact` and returns true. - bool handleTestPoint(const CXXFunctionalCastExpr *FCE) { - if (!FCE->getType()->isVoidType()) - return false; - - const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); - if (const auto *SL = dyn_cast<StringLiteral>(SubExpr)) { - llvm::StringRef LiteralValue = SL->getString(); - const std::string Prefix = "__lifetime_test_point_"; - - if (LiteralValue.starts_with(Prefix)) { - StringRef Annotation = LiteralValue.drop_front(Prefix.length()); - CurrentBlockFacts.push_back( - FactMgr.createFact<TestPointFact>(Annotation)); - return true; - } - } - return false; - } - - void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) { - if (!hasOrigin(LHSExpr)) - return; - // Find the underlying variable declaration for the left-hand side. - if (const auto *DRE_LHS = - dyn_cast<DeclRefExpr>(LHSExpr->IgnoreParenImpCasts())) { - markUseAsWrite(DRE_LHS); - if (const auto *VD_LHS = dyn_cast<ValueDecl>(DRE_LHS->getDecl())) { - // Kill the old loans of the destination origin and flow the new loans - // from the source origin. - killAndFlowOrigin(*VD_LHS, *RHSExpr); - } - } - } - - // A DeclRefExpr will be treated as a use of the referenced decl. It will be - // checked for use-after-free unless it is later marked as being written to - // (e.g. on the left-hand side of an assignment). - void handleUse(const DeclRefExpr *DRE) { - if (isPointerType(DRE->getType())) { - UseFact *UF = FactMgr.createFact<UseFact>(DRE); - CurrentBlockFacts.push_back(UF); - assert(!UseFacts.contains(DRE)); - UseFacts[DRE] = UF; - } - } - - void markUseAsWrite(const DeclRefExpr *DRE) { - if (!isPointerType(DRE->getType())) - return; - assert(UseFacts.contains(DRE)); - UseFacts[DRE]->markAsWritten(); - } - - FactManager &FactMgr; - AnalysisDeclContext &AC; - llvm::SmallVector<Fact *> CurrentBlockFacts; - // To distinguish between reads and writes for use-after-free checks, this map - // stores the `UseFact` for each `DeclRefExpr`. We initially identify all - // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use - // corresponding to the left-hand side is updated to be a "write", thereby - // exempting it from the check. - llvm::DenseMap<const DeclRefExpr *, UseFact *> UseFacts; -}; - -// ========================================================================= // -// Generic Dataflow Analysis -// ========================================================================= // - -enum class Direction { Forward, Backward }; - -/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific -/// `Fact`. identified by a lifetime-related event (`Fact`). -/// -/// A `ProgramPoint` has "after" semantics: it represents the location -/// immediately after its corresponding `Fact`. -using ProgramPoint = const Fact *; - -/// A generic, policy-based driver for dataflow analyses. It combines -/// the dataflow runner and the transferer logic into a single class hierarchy. -/// -/// The derived class is expected to provide: -/// - A `Lattice` type. -/// - `StringRef getAnalysisName() const` -/// - `Lattice getInitialState();` The initial state of the analysis. -/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. -/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single -/// lifetime-relevant `Fact` transforms the lattice state. Only overloads -/// for facts relevant to the analysis need to be implemented. -/// -/// \tparam Derived The CRTP derived class that implements the specific -/// analysis. -/// \tparam LatticeType The dataflow lattice used by the analysis. -/// \tparam Dir The direction of the analysis (Forward or Backward). -/// TODO: Maybe use the dataflow framework! The framework might need changes -/// to support the current comparison done at block-entry. -template <typename Derived, typename LatticeType, Direction Dir> -class DataflowAnalysis { -public: - using Lattice = LatticeType; - using Base = DataflowAnalysis<Derived, Lattice, Dir>; - -private: - const CFG &Cfg; - AnalysisDeclContext &AC; - - /// The dataflow state before a basic block is processed. - llvm::DenseMap<const CFGBlock *, Lattice> InStates; - /// The dataflow state after a basic block is processed. - llvm::DenseMap<const CFGBlock *, Lattice> OutStates; - /// The dataflow state at a Program Point. - /// In a forward analysis, this is the state after the Fact at that point has - /// been applied, while in a backward analysis, it is the state before. - llvm::DenseMap<ProgramPoint, Lattice> PerPointStates; - - static constexpr bool isForward() { return Dir == Direction::Forward; } - -protected: - FactManager &AllFacts; - - explicit DataflowAnalysis(const CFG &C, AnalysisDeclContext &AC, - FactManager &F) - : Cfg(C), AC(AC), AllFacts(F) {} - -public: - void run() { - Derived &D = static_cast<Derived &>(*this); - llvm::TimeTraceScope Time(D.getAnalysisName()); - - using Worklist = - std::conditional_t<Dir == Direction::Forward, ForwardDataflowWorklist, - BackwardDataflowWorklist>; - Worklist W(Cfg, AC); - - const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); - InStates[Start] = D.getInitialState(); - W.enqueueBlock(Start); - - while (const CFGBlock *B = W.dequeue()) { - Lattice StateIn = *getInState(B); - Lattice StateOut = transferBlock(B, StateIn); - OutStates[B] = StateOut; - for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { - if (!AdjacentB) - continue; - std::optional<Lattice> OldInState = getInState(AdjacentB); - Lattice NewInState = - !OldInState ? StateOut : D.join(*OldInState, StateOut); - // Enqueue the adjacent block if its in-state has changed or if we have - // never seen it. - if (!OldInState || NewInState != *OldInState) { - InStates[AdjacentB] = NewInState; - W.enqueueBlock(AdjacentB); - } - } - } - } - -protected: - Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } - - std::optional<Lattice> getInState(const CFGBlock *B) const { - auto It = InStates.find(B); - if (It == InStates.end()) - return std::nullopt; - return It->second; - } - - Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } - - void dump() const { - const Derived *D = static_cast<const Derived *>(this); - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << D->getAnalysisName() << " results:\n"; - llvm::dbgs() << "==========================================\n"; - const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); - getOutState(&B).dump(llvm::dbgs()); - } - -private: - /// Computes the state at one end of a block by applying all its facts - /// sequentially to a given state from the other end. - Lattice transferBlock(const CFGBlock *Block, Lattice State) { - auto Facts = AllFacts.getFacts(Block); - if constexpr (isForward()) { - for (const Fact *F : Facts) { - State = transferFact(State, F); - PerPointStates[F] = State; - } - } else { - for (const Fact *F : llvm::reverse(Facts)) { - // In backward analysis, capture the state before applying the fact. - PerPointStates[F] = State; - State = transferFact(State, F); - } - } - return State; - } - - Lattice transferFact(Lattice In, const Fact *F) { - assert(F); - Derived *D = static_cast<Derived *>(this); - switch (F->getKind()) { - case Fact::Kind::Issue: - return D->transfer(In, *F->getAs<IssueFact>()); - case Fact::Kind::Expire: - return D->transfer(In, *F->getAs<ExpireFact>()); - case Fact::Kind::OriginFlow: - return D->transfer(In, *F->getAs<OriginFlowFact>()); - case Fact::Kind::ReturnOfOrigin: - return D->transfer(In, *F->getAs<ReturnOfOriginFact>()); - case Fact::Kind::Use: - return D->transfer(In, *F->getAs<UseFact>()); - case Fact::Kind::TestPoint: - return D->transfer(In, *F->getAs<TestPointFact>()); - } - llvm_unreachable("Unknown fact kind"); - } - -public: - Lattice transfer(Lattice In, const IssueFact &) { return In; } - Lattice transfer(Lattice In, const ExpireFact &) { return In; } - Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } - Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } - Lattice transfer(Lattice In, const UseFact &) { return In; } - Lattice transfer(Lattice In, const TestPointFact &) { return In; } -}; - -namespace utils { - -/// Computes the union of two ImmutableSets. -template <typename T> -static llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A, - llvm::ImmutableSet<T> B, - typename llvm::ImmutableSet<T>::Factory &F) { - if (A.getHeight() < B.getHeight()) - std::swap(A, B); - for (const T &E : B) - A = F.add(A, E); - return A; -} - -/// Describes the strategy for joining two `ImmutableMap` instances, primarily -/// differing in how they handle keys that are unique to one of the maps. -/// -/// A `Symmetric` join is universally correct, while an `Asymmetric` join -/// serves as a performance optimization. The latter is applicable only when the -/// join operation possesses a left identity element, allowing for a more -/// efficient, one-sided merge. -enum class JoinKind { - /// A symmetric join applies the `JoinValues` operation to keys unique to - /// either map, ensuring that values from both maps contribute to the result. - Symmetric, - /// An asymmetric join preserves keys unique to the first map as-is, while - /// applying the `JoinValues` operation only to keys unique to the second map. - Asymmetric, -}; - -/// Computes the key-wise union of two ImmutableMaps. -// TODO(opt): This key-wise join is a performance bottleneck. A more -// efficient merge could be implemented using a Patricia Trie or HAMT -// instead of the current AVL-tree-based ImmutableMap. -template <typename K, typename V, typename Joiner> -static llvm::ImmutableMap<K, V> -join(const llvm::ImmutableMap<K, V> &A, const llvm::ImmutableMap<K, V> &B, - typename llvm::ImmutableMap<K, V>::Factory &F, Joiner JoinValues, - JoinKind Kind) { - if (A.getHeight() < B.getHeight()) - return join(B, A, F, JoinValues, Kind); - - // For each element in B, join it with the corresponding element in A - // (or with an empty value if it doesn't exist in A). - llvm::ImmutableMap<K, V> Res = A; - for (const auto &Entry : B) { - const K &Key = Entry.first; - const V &ValB = Entry.second; - Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); - } - if (Kind == JoinKind::Symmetric) { - for (const auto &Entry : A) { - const K &Key = Entry.first; - const V &ValA = Entry.second; - if (!B.contains(Key)) - Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); - } - } - return Res; -} -} // namespace utils - -// ========================================================================= // -// Loan Propagation Analysis -// ========================================================================= // - -/// Represents the dataflow lattice for loan propagation. -/// -/// This lattice tracks which loans each origin may hold at a given program -/// point.The lattice has a finite height: An origin's loan set is bounded by -/// the total number of loans in the function. -/// TODO(opt): To reduce the lattice size, propagate origins of declarations, -/// not expressions, because expressions are not visible across blocks. -struct LoanPropagationLattice { - /// The map from an origin to the set of loans it contains. - OriginLoanMap Origins = OriginLoanMap(nullptr); - - explicit LoanPropagationLattice(const OriginLoanMap &S) : Origins(S) {} - LoanPropagationLattice() = default; - - bool operator==(const LoanPropagationLattice &Other) const { - return Origins == Other.Origins; - } - bool operator!=(const LoanPropagationLattice &Other) const { - return !(*this == Other); - } - - void dump(llvm::raw_ostream &OS) const { - OS << "LoanPropagationLattice State:\n"; - if (Origins.isEmpty()) - OS << " <empty>\n"; - for (const auto &Entry : Origins) { - if (Entry.second.isEmpty()) - OS << " Origin " << Entry.first << " contains no loans\n"; - for (const LoanID &LID : Entry.second) - OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; - } - } -}; - -/// The analysis that tracks which loans belong to which origins. -class LoanPropagationAnalysis - : public DataflowAnalysis<LoanPropagationAnalysis, LoanPropagationLattice, - Direction::Forward> { - OriginLoanMap::Factory &OriginLoanMapFactory; - LoanSet::Factory &LoanSetFactory; - -public: - LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - OriginLoanMap::Factory &OriginLoanMapFactory, - LoanSet::Factory &LoanSetFactory) - : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), - LoanSetFactory(LoanSetFactory) {} - - using Base::transfer; - - StringRef getAnalysisName() const { return "LoanPropagation"; } - - Lattice getInitialState() { return Lattice{}; } - - /// Merges two lattices by taking the union of loans for each origin. - // TODO(opt): Keep the state small by removing origins which become dead. - Lattice join(Lattice A, Lattice B) { - OriginLoanMap JoinedOrigins = utils::join( - A.Origins, B.Origins, OriginLoanMapFactory, - [&](const LoanSet *S1, const LoanSet *S2) { - assert((S1 || S2) && "unexpectedly merging 2 empty sets"); - if (!S1) - return *S2; - if (!S2) - return *S1; - return utils::join(*S1, *S2, LoanSetFactory); - }, - // Asymmetric join is a performance win. For origins present only on one - // branch, the loan set can be carried over as-is. - utils::JoinKind::Asymmetric); - return Lattice(JoinedOrigins); - } - - /// A new loan is issued to the origin. Old loans are erased. - Lattice transfer(Lattice In, const IssueFact &F) { - OriginID OID = F.getOriginID(); - LoanID LID = F.getLoanID(); - return LoanPropagationLattice(OriginLoanMapFactory.add( - In.Origins, OID, - LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); - } - - /// A flow from source to destination. If `KillDest` is true, this replaces - /// the destination's loans with the source's. Otherwise, the source's loans - /// are merged into the destination's. - Lattice transfer(Lattice In, const OriginFlowFact &F) { - OriginID DestOID = F.getDestOriginID(); - OriginID SrcOID = F.getSrcOriginID(); - - LoanSet DestLoans = - F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); - LoanSet SrcLoans = getLoans(In, SrcOID); - LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); - - return LoanPropagationLattice( - OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); - } - - LoanSet getLoans(OriginID OID, ProgramPoint P) const { - return getLoans(getState(P), OID); - } - -private: - LoanSet getLoans(Lattice L, OriginID OID) const { - if (auto *Loans = L.Origins.lookup(OID)) - return *Loans; - return LoanSetFactory.getEmptySet(); - } -}; - -// ========================================================================= // -// Live Origins Analysis -// ========================================================================= // -// -// A backward dataflow analysis that determines which origins are "live" at each -// program point. An origin is "live" at a program point if there's a potential -// future use of the pointer it represents. Liveness is "generated" by a read of -// origin's loan set (e.g., a `UseFact`) and is "killed" (i.e., it stops being -// live) when its loan set is overwritten (e.g. a OriginFlow killing the -// destination origin). -// -// This information is used for detecting use-after-free errors, as it allows us -// to check if a live origin holds a loan to an object that has already expired. -// ========================================================================= // - -/// Information about why an origin is live at a program point. -struct LivenessInfo { - /// The use that makes the origin live. If liveness is propagated from - /// multiple uses along different paths, this will point to the use appearing - /// earlier in the translation unit. - /// This is 'null' when the origin is not live. - const UseFact *CausingUseFact; - /// The kind of liveness of the origin. - /// `Must`: The origin is live on all control-flow paths from the current - /// point to the function's exit (i.e. the current point is dominated by a set - /// of uses). - /// `Maybe`: indicates it is live on some but not all paths. - /// - /// This determines the diagnostic's confidence level. - /// `Must`-be-alive at expiration implies a definite use-after-free, - /// while `Maybe`-be-alive suggests a potential one on some paths. - LivenessKind Kind; - - LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} - LivenessInfo(const UseFact *UF, LivenessKind K) - : CausingUseFact(UF), Kind(K) {} - - bool operator==(const LivenessInfo &Other) const { - return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; - } - bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } - - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddPointer(CausingUseFact); - IDBuilder.Add(Kind); - } -}; - -using LivenessMap = llvm::ImmutableMap<OriginID, LivenessInfo>; - -/// The dataflow lattice for origin liveness analysis. -/// It tracks which origins are live, why they're live (which UseFact), -/// and the confidence level of that liveness. -struct LivenessLattice { - LivenessMap LiveOrigins; - - LivenessLattice() : LiveOrigins(nullptr) {}; - - explicit LivenessLattice(LivenessMap L) : LiveOrigins(L) {} - - bool operator==(const LivenessLattice &Other) const { - return LiveOrigins == Other.LiveOrigins; - } - - bool operator!=(const LivenessLattice &Other) const { - return !(*this == Other); - } - - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { - if (LiveOrigins.isEmpty()) - OS << " <empty>\n"; - for (const auto &Entry : LiveOrigins) { - OriginID OID = Entry.first; - const LivenessInfo &Info = Entry.second; - OS << " "; - OM.dump(OID, OS); - OS << " is "; - switch (Info.Kind) { - case LivenessKind::Must: - OS << "definitely"; - break; - case LivenessKind::Maybe: - OS << "maybe"; - break; - case LivenessKind::Dead: - llvm_unreachable("liveness kind of live origins should not be dead."); - } - OS << " live at this point\n"; - } - } -}; - -/// The analysis that tracks which origins are live, with granular information -/// about the causing use fact and confidence level. This is a backward -/// analysis. -class LiveOriginAnalysis - : public DataflowAnalysis<LiveOriginAnalysis, LivenessLattice, - Direction::Backward> { - FactManager &FactMgr; - LivenessMap::Factory &Factory; - -public: - LiveOriginAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - LivenessMap::Factory &SF) - : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} - using DataflowAnalysis<LiveOriginAnalysis, Lattice, - Direction::Backward>::transfer; - - StringRef getAnalysisName() const { return "LiveOrigins"; } - - Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } - - /// Merges two lattices by combining liveness information. - /// When the same origin has different confidence levels, we take the lower - /// one. - Lattice join(Lattice L1, Lattice L2) const { - LivenessMap Merged = L1.LiveOrigins; - // Take the earliest UseFact to make the join hermetic and commutative. - auto CombineUseFact = [](const UseFact &A, - const UseFact &B) -> const UseFact * { - return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A - : &B; - }; - auto CombineLivenessKind = [](LivenessKind K1, - LivenessKind K2) -> LivenessKind { - assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); - assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); - // Only return "Must" if both paths are "Must", otherwise Maybe. - if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) - return LivenessKind::Must; - return LivenessKind::Maybe; - }; - auto CombineLivenessInfo = [&](const LivenessInfo *L1, - const LivenessInfo *L2) -> LivenessInfo { - assert((L1 || L2) && "unexpectedly merging 2 empty sets"); - if (!L1) - return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); - if (!L2) - return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); - return LivenessInfo( - CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), - CombineLivenessKind(L1->Kind, L2->Kind)); - }; - return Lattice(utils::join( - L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, - // A symmetric join is required here. If an origin is live on one - // branch but not the other, its confidence must be demoted to `Maybe`. - utils::JoinKind::Symmetric)); - } - - /// A read operation makes the origin live with definite confidence, as it - /// dominates this program point. A write operation kills the liveness of - /// the origin since it overwrites the value. - Lattice transfer(Lattice In, const UseFact &UF) { - OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); - // Write kills liveness. - if (UF.isWritten()) - return Lattice(Factory.remove(In.LiveOrigins, OID)); - // Read makes origin live with definite confidence (dominates this point). - return Lattice(Factory.add(In.LiveOrigins, OID, - LivenessInfo(&UF, LivenessKind::Must))); - } - - /// Issuing a new loan to an origin kills its liveness. - Lattice transfer(Lattice In, const IssueFact &IF) { - return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); - } - - /// An OriginFlow kills the liveness of the destination origin if `KillDest` - /// is true. Otherwise, it propagates liveness from destination to source. - Lattice transfer(Lattice In, const OriginFlowFact &OF) { - if (!OF.getKillDest()) - return In; - return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); - } - - LivenessMap getLiveOrigins(ProgramPoint P) const { - return getState(P).LiveOrigins; - } - - // Dump liveness values on all test points in the program. - void dump(llvm::raw_ostream &OS, const LifetimeSafetyAnalysis &LSA) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << getAnalysisName() << " results:\n"; - llvm::dbgs() << "==========================================\n"; - for (const auto &Entry : LSA.getTestPoints()) { - OS << "TestPoint: " << Entry.getKey() << "\n"; - getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); - } - } -}; - -// ========================================================================= // -// Lifetime checker and Error reporter -// ========================================================================= // - -/// Struct to store the complete context for a potential lifetime violation. -struct PendingWarning { - SourceLocation ExpiryLoc; // Where the loan expired. - const Expr *UseExpr; // Where the origin holding this loan was used. - Confidence ConfidenceLevel; -}; - -class LifetimeChecker { -private: - llvm::DenseMap<LoanID, PendingWarning> FinalWarningsMap; - LoanPropagationAnalysis &LoanPropagation; - LiveOriginAnalysis &LiveOrigins; - FactManager &FactMgr; - AnalysisDeclContext &ADC; - LifetimeSafetyReporter *Reporter; - -public: - LifetimeChecker(LoanPropagationAnalysis &LPA, LiveOriginAnalysis &LOA, - FactManager &FM, AnalysisDeclContext &ADC, - LifetimeSafetyReporter *Reporter) - : LoanPropagation(LPA), LiveOrigins(LOA), FactMgr(FM), ADC(ADC), - Reporter(Reporter) {} - - void run() { - llvm::TimeTraceScope TimeProfile("LifetimeChecker"); - for (const CFGBlock *B : *ADC.getAnalysis<PostOrderCFGView>()) - for (const Fact *F : FactMgr.getFacts(B)) - if (const auto *EF = F->getAs<ExpireFact>()) - checkExpiry(EF); - issuePendingWarnings(); - } - - /// Checks for use-after-free errors when a loan expires. - /// - /// This method examines all live origins at the expiry point and determines - /// if any of them hold the expiring loan. If so, it creates a pending - /// warning with the appropriate confidence level based on the liveness - /// information. The confidence reflects whether the origin is definitely - /// or maybe live at this point. - /// - /// Note: This implementation considers only the confidence of origin - /// liveness. Future enhancements could also consider the confidence of loan - /// propagation (e.g., a loan may only be held on some execution paths). - void checkExpiry(const ExpireFact *EF) { - LoanID ExpiredLoan = EF->getLoanID(); - LivenessMap Origins = LiveOrigins.getLiveOrigins(EF); - Confidence CurConfidence = Confidence::None; - const UseFact *BadUse = nullptr; - for (auto &[OID, LiveInfo] : Origins) { - LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); - if (!HeldLoans.contains(ExpiredLoan)) - continue; - // Loan is defaulted. - Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); - if (CurConfidence < NewConfidence) { - CurConfidence = NewConfidence; - BadUse = LiveInfo.CausingUseFact; - } - } - if (!BadUse) - return; - // We have a use-after-free. - Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; - if (LastConf >= CurConfidence) - return; - FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), - /*UseExpr=*/BadUse->getUseExpr(), - /*ConfidenceLevel=*/CurConfidence}; - } - - static Confidence livenessKindToConfidence(LivenessKind K) { - switch (K) { - case LivenessKind::Must: - return Confidence::Definite; - case LivenessKind::Maybe: - return Confidence::Maybe; - case LivenessKind::Dead: - return Confidence::None; - } - llvm_unreachable("unknown liveness kind"); - } - - void issuePendingWarnings() { - if (!Reporter) - return; - for (const auto &[LID, Warning] : FinalWarningsMap) { - const Loan &L = FactMgr.getLoanMgr().getLoan(LID); - const Expr *IssueExpr = L.IssueExpr; - Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, - Warning.ExpiryLoc, Warning.ConfidenceLevel); - } - } -}; - -// ========================================================================= // -// LifetimeSafetyAnalysis Class Implementation -// ========================================================================= // - -/// An object to hold the factories for immutable collections, ensuring -/// that all created states share the same underlying memory management. -struct LifetimeFactory { - llvm::BumpPtrAllocator Allocator; - OriginLoanMap::Factory OriginMapFactory{Allocator, /*canonicalize=*/false}; - LoanSet::Factory LoanSetFactory{Allocator, /*canonicalize=*/false}; - LivenessMap::Factory LivenessMapFactory{Allocator, /*canonicalize=*/false}; -}; - -// We need this here for unique_ptr with forward declared class. -LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default; - -LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) - : AC(AC), Reporter(Reporter), Factory(std::make_unique<LifetimeFactory>()), - FactMgr(std::make_unique<FactManager>()) {} - -void LifetimeSafetyAnalysis::run() { - llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); - - const CFG &Cfg = *AC.getCFG(); - DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), - /*ShowColors=*/true)); - - FactGenerator FactGen(*FactMgr, AC); - FactGen.run(); - DEBUG_WITH_TYPE("LifetimeFacts", FactMgr->dump(Cfg, AC)); - - /// TODO(opt): Consider optimizing individual blocks before running the - /// dataflow analysis. - /// 1. Expression Origins: These are assigned once and read at most once, - /// forming simple chains. These chains can be compressed into a single - /// assignment. - /// 2. Block-Local Loans: Origins of expressions are never read by other - /// blocks; only Decls are visible. Therefore, loans in a block that - /// never reach an Origin associated with a Decl can be safely dropped by - /// the analysis. - /// 3. Collapse ExpireFacts belonging to same source location into a single - /// Fact. - LoanPropagation = std::make_unique<LoanPropagationAnalysis>( - Cfg, AC, *FactMgr, Factory->OriginMapFactory, Factory->LoanSetFactory); - LoanPropagation->run(); - - LiveOrigins = std::make_unique<LiveOriginAnalysis>( - Cfg, AC, *FactMgr, Factory->LivenessMapFactory); - LiveOrigins->run(); - DEBUG_WITH_TYPE("LiveOrigins", LiveOrigins->dump(llvm::dbgs(), *this)); - - LifetimeChecker Checker(*LoanPropagation, *LiveOrigins, *FactMgr, AC, - Reporter); - Checker.run(); -} - -LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, - ProgramPoint PP) const { - assert(LoanPropagation && "Analysis has not been run."); - return LoanPropagation->getLoans(OID, PP); -} - -std::optional<OriginID> -LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const { - assert(FactMgr && "FactManager not initialized"); - // This assumes the OriginManager's `get` can find an existing origin. - // We might need a `find` method on OriginManager to avoid `getOrCreate` logic - // in a const-query context if that becomes an issue. - return FactMgr->getOriginMgr().get(*D); -} - -std::vector<LoanID> -LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const { - assert(FactMgr && "FactManager not initialized"); - std::vector<LoanID> Result; - for (const Loan &L : FactMgr->getLoanMgr().getLoans()) - if (L.Path.D == VD) - Result.push_back(L.ID); - return Result; -} - -std::vector<std::pair<OriginID, LivenessKind>> -LifetimeSafetyAnalysis::getLiveOriginsAtPoint(ProgramPoint PP) const { - assert(LiveOrigins && "LiveOriginAnalysis has not been run."); - std::vector<std::pair<OriginID, LivenessKind>> Result; - for (auto &[OID, Info] : LiveOrigins->getLiveOrigins(PP)) - Result.push_back({OID, Info.Kind}); - return Result; -} - -llvm::StringMap<ProgramPoint> LifetimeSafetyAnalysis::getTestPoints() const { - assert(FactMgr && "FactManager not initialized"); - llvm::StringMap<ProgramPoint> AnnotationToPointMap; - for (const CFGBlock *Block : *AC.getCFG()) { - for (const Fact *F : FactMgr->getFacts(Block)) { - if (const auto *TPF = F->getAs<TestPointFact>()) { - StringRef PointName = TPF->getAnnotation(); - assert(AnnotationToPointMap.find(PointName) == - AnnotationToPointMap.end() && - "more than one test points with the same name"); - AnnotationToPointMap[PointName] = F; - } - } - } - return AnnotationToPointMap; -} -} // namespace internal - -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) { - internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); - Analysis.run(); -} -} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt b/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt new file mode 100644 index 0000000..5874e84 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt @@ -0,0 +1,17 @@ +add_clang_library(clangAnalysisLifetimeSafety + Checker.cpp + Facts.cpp + FactsGenerator.cpp + LifetimeAnnotations.cpp + LifetimeSafety.cpp + LiveOrigins.cpp + Loans.cpp + LoanPropagation.cpp + Origins.cpp + + LINK_LIBS + clangAST + clangAnalysis + clangBasic + ) + diff --git a/clang/lib/Analysis/LifetimeSafety/Checker.cpp b/clang/lib/Analysis/LifetimeSafety/Checker.cpp new file mode 100644 index 0000000..c443c3a --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Checker.cpp @@ -0,0 +1,130 @@ +//===- Checker.cpp - C++ Lifetime Safety Checker ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the LifetimeChecker, which detects use-after-free +// errors by checking if live origins hold loans that have expired. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" +#include "clang/AST/Expr.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" + +namespace clang::lifetimes::internal { + +static Confidence livenessKindToConfidence(LivenessKind K) { + switch (K) { + case LivenessKind::Must: + return Confidence::Definite; + case LivenessKind::Maybe: + return Confidence::Maybe; + case LivenessKind::Dead: + return Confidence::None; + } + llvm_unreachable("unknown liveness kind"); +} + +namespace { + +/// Struct to store the complete context for a potential lifetime violation. +struct PendingWarning { + SourceLocation ExpiryLoc; // Where the loan expired. + const Expr *UseExpr; // Where the origin holding this loan was used. + Confidence ConfidenceLevel; +}; + +class LifetimeChecker { +private: + llvm::DenseMap<LoanID, PendingWarning> FinalWarningsMap; + const LoanPropagationAnalysis &LoanPropagation; + const LiveOriginsAnalysis &LiveOrigins; + const FactManager &FactMgr; + LifetimeSafetyReporter *Reporter; + +public: + LifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, + const LiveOriginsAnalysis &LiveOrigins, const FactManager &FM, + AnalysisDeclContext &ADC, LifetimeSafetyReporter *Reporter) + : LoanPropagation(LoanPropagation), LiveOrigins(LiveOrigins), FactMgr(FM), + Reporter(Reporter) { + for (const CFGBlock *B : *ADC.getAnalysis<PostOrderCFGView>()) + for (const Fact *F : FactMgr.getFacts(B)) + if (const auto *EF = F->getAs<ExpireFact>()) + checkExpiry(EF); + issuePendingWarnings(); + } + + /// Checks for use-after-free errors when a loan expires. + /// + /// This method examines all live origins at the expiry point and determines + /// if any of them hold the expiring loan. If so, it creates a pending + /// warning with the appropriate confidence level based on the liveness + /// information. The confidence reflects whether the origin is definitely + /// or maybe live at this point. + /// + /// Note: This implementation considers only the confidence of origin + /// liveness. Future enhancements could also consider the confidence of loan + /// propagation (e.g., a loan may only be held on some execution paths). + void checkExpiry(const ExpireFact *EF) { + LoanID ExpiredLoan = EF->getLoanID(); + LivenessMap Origins = LiveOrigins.getLiveOriginsAt(EF); + Confidence CurConfidence = Confidence::None; + const UseFact *BadUse = nullptr; + for (auto &[OID, LiveInfo] : Origins) { + LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); + if (!HeldLoans.contains(ExpiredLoan)) + continue; + // Loan is defaulted. + Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); + if (CurConfidence < NewConfidence) { + CurConfidence = NewConfidence; + BadUse = LiveInfo.CausingUseFact; + } + } + if (!BadUse) + return; + // We have a use-after-free. + Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; + if (LastConf >= CurConfidence) + return; + FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), + /*UseExpr=*/BadUse->getUseExpr(), + /*ConfidenceLevel=*/CurConfidence}; + } + + void issuePendingWarnings() { + if (!Reporter) + return; + for (const auto &[LID, Warning] : FinalWarningsMap) { + const Loan &L = FactMgr.getLoanMgr().getLoan(LID); + const Expr *IssueExpr = L.IssueExpr; + Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, + Warning.ExpiryLoc, Warning.ConfidenceLevel); + } + } +}; +} // namespace + +void runLifetimeChecker(const LoanPropagationAnalysis &LP, + const LiveOriginsAnalysis &LO, + const FactManager &FactMgr, AnalysisDeclContext &ADC, + LifetimeSafetyReporter *Reporter) { + llvm::TimeTraceScope TimeProfile("LifetimeChecker"); + LifetimeChecker Checker(LP, LO, FactMgr, ADC, Reporter); +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Dataflow.h b/clang/lib/Analysis/LifetimeSafety/Dataflow.h new file mode 100644 index 0000000..2f7bcb6 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Dataflow.h @@ -0,0 +1,188 @@ +//===- Dataflow.h - Generic Dataflow Analysis Framework --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a generic, policy-based driver for dataflow analyses. +// It provides a flexible framework that combines the dataflow runner and +// transfer functions, allowing derived classes to implement specific analyses +// by defining their lattice, join, and transfer functions. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" +#include <optional> + +namespace clang::lifetimes::internal { + +enum class Direction { Forward, Backward }; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +/// A generic, policy-based driver for dataflow analyses. It combines +/// the dataflow runner and the transferer logic into a single class hierarchy. +/// +/// The derived class is expected to provide: +/// - A `Lattice` type. +/// - `StringRef getAnalysisName() const` +/// - `Lattice getInitialState();` The initial state of the analysis. +/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. +/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single +/// lifetime-relevant `Fact` transforms the lattice state. Only overloads +/// for facts relevant to the analysis need to be implemented. +/// +/// \tparam Derived The CRTP derived class that implements the specific +/// analysis. +/// \tparam LatticeType The dataflow lattice used by the analysis. +/// \tparam Dir The direction of the analysis (Forward or Backward). +/// TODO: Maybe use the dataflow framework! The framework might need changes +/// to support the current comparison done at block-entry. +template <typename Derived, typename LatticeType, Direction Dir> +class DataflowAnalysis { +public: + using Lattice = LatticeType; + using Base = DataflowAnalysis<Derived, Lattice, Dir>; + +private: + const CFG &Cfg; + AnalysisDeclContext &AC; + + /// The dataflow state before a basic block is processed. + llvm::DenseMap<const CFGBlock *, Lattice> InStates; + /// The dataflow state after a basic block is processed. + llvm::DenseMap<const CFGBlock *, Lattice> OutStates; + /// The dataflow state at a Program Point. + /// In a forward analysis, this is the state after the Fact at that point has + /// been applied, while in a backward analysis, it is the state before. + llvm::DenseMap<ProgramPoint, Lattice> PerPointStates; + + static constexpr bool isForward() { return Dir == Direction::Forward; } + +protected: + FactManager &FactMgr; + + explicit DataflowAnalysis(const CFG &Cfg, AnalysisDeclContext &AC, + FactManager &FactMgr) + : Cfg(Cfg), AC(AC), FactMgr(FactMgr) {} + +public: + void run() { + Derived &D = static_cast<Derived &>(*this); + llvm::TimeTraceScope Time(D.getAnalysisName()); + + using Worklist = + std::conditional_t<Dir == Direction::Forward, ForwardDataflowWorklist, + BackwardDataflowWorklist>; + Worklist W(Cfg, AC); + + const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); + InStates[Start] = D.getInitialState(); + W.enqueueBlock(Start); + + while (const CFGBlock *B = W.dequeue()) { + Lattice StateIn = *getInState(B); + Lattice StateOut = transferBlock(B, StateIn); + OutStates[B] = StateOut; + for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { + if (!AdjacentB) + continue; + std::optional<Lattice> OldInState = getInState(AdjacentB); + Lattice NewInState = + !OldInState ? StateOut : D.join(*OldInState, StateOut); + // Enqueue the adjacent block if its in-state has changed or if we have + // never seen it. + if (!OldInState || NewInState != *OldInState) { + InStates[AdjacentB] = NewInState; + W.enqueueBlock(AdjacentB); + } + } + } + } + +protected: + Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } + + std::optional<Lattice> getInState(const CFGBlock *B) const { + auto It = InStates.find(B); + if (It == InStates.end()) + return std::nullopt; + return It->second; + } + + Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } + + void dump() const { + const Derived *D = static_cast<const Derived *>(this); + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << D->getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); + getOutState(&B).dump(llvm::dbgs()); + } + +private: + /// Computes the state at one end of a block by applying all its facts + /// sequentially to a given state from the other end. + Lattice transferBlock(const CFGBlock *Block, Lattice State) { + auto Facts = FactMgr.getFacts(Block); + if constexpr (isForward()) { + for (const Fact *F : Facts) { + State = transferFact(State, F); + PerPointStates[F] = State; + } + } else { + for (const Fact *F : llvm::reverse(Facts)) { + // In backward analysis, capture the state before applying the fact. + PerPointStates[F] = State; + State = transferFact(State, F); + } + } + return State; + } + + Lattice transferFact(Lattice In, const Fact *F) { + assert(F); + Derived *D = static_cast<Derived *>(this); + switch (F->getKind()) { + case Fact::Kind::Issue: + return D->transfer(In, *F->getAs<IssueFact>()); + case Fact::Kind::Expire: + return D->transfer(In, *F->getAs<ExpireFact>()); + case Fact::Kind::OriginFlow: + return D->transfer(In, *F->getAs<OriginFlowFact>()); + case Fact::Kind::ReturnOfOrigin: + return D->transfer(In, *F->getAs<ReturnOfOriginFact>()); + case Fact::Kind::Use: + return D->transfer(In, *F->getAs<UseFact>()); + case Fact::Kind::TestPoint: + return D->transfer(In, *F->getAs<TestPointFact>()); + } + llvm_unreachable("Unknown fact kind"); + } + +public: + Lattice transfer(Lattice In, const IssueFact &) { return In; } + Lattice transfer(Lattice In, const ExpireFact &) { return In; } + Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } + Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } + Lattice transfer(Lattice In, const UseFact &) { return In; } + Lattice transfer(Lattice In, const TestPointFact &) { return In; } +}; +} // namespace clang::lifetimes::internal +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp new file mode 100644 index 0000000..1aea64f --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp @@ -0,0 +1,102 @@ +//===- Facts.cpp - Lifetime Analysis Facts Implementation -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/AST/Decl.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" + +namespace clang::lifetimes::internal { + +void Fact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { + OS << "Fact (Kind: " << static_cast<int>(K) << ")\n"; +} + +void IssueFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const { + OS << "Issue ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ", ToOrigin: "; + OM.dump(getOriginID(), OS); + OS << ")\n"; +} + +void ExpireFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const { + OS << "Expire ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ")\n"; +} + +void OriginFlowFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "OriginFlow (Dest: "; + OM.dump(getDestOriginID(), OS); + OS << ", Src: "; + OM.dump(getSrcOriginID(), OS); + OS << (getKillDest() ? "" : ", Merge"); + OS << ")\n"; +} + +void ReturnOfOriginFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "ReturnOfOrigin ("; + OM.dump(getReturnedOriginID(), OS); + OS << ")\n"; +} + +void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "Use ("; + OM.dump(getUsedOrigin(OM), OS); + OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; +} + +void TestPointFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { + OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; +} + +llvm::StringMap<ProgramPoint> FactManager::getTestPoints() const { + llvm::StringMap<ProgramPoint> AnnotationToPointMap; + for (const CFGBlock *Block : BlockToFactsMap.keys()) { + for (const Fact *F : getFacts(Block)) { + if (const auto *TPF = F->getAs<TestPointFact>()) { + StringRef PointName = TPF->getAnnotation(); + assert(AnnotationToPointMap.find(PointName) == + AnnotationToPointMap.end() && + "more than one test points with the same name"); + AnnotationToPointMap[PointName] = F; + } + } + } + return AnnotationToPointMap; +} + +void FactManager::dump(const CFG &Cfg, AnalysisDeclContext &AC) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Lifetime Analysis Facts:\n"; + llvm::dbgs() << "==========================================\n"; + if (const Decl *D = AC.getDecl()) + if (const auto *ND = dyn_cast<NamedDecl>(D)) + llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; + // Print blocks in the order as they appear in code for a stable ordering. + for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) { + llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) { + for (const Fact *F : It->second) { + llvm::dbgs() << " "; + F->dump(llvm::dbgs(), LoanMgr, OriginMgr); + } + } + llvm::dbgs() << " End of Block\n"; + } +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp new file mode 100644 index 0000000..485308f --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -0,0 +1,348 @@ +//===- FactsGenerator.cpp - Lifetime Facts Generation -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "llvm/Support/TimeProfiler.h" + +namespace clang::lifetimes::internal { + +static bool isGslPointerType(QualType QT) { + if (const auto *RD = QT->getAsCXXRecordDecl()) { + // We need to check the template definition for specializations. + if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) + return CTSD->getSpecializedTemplate() + ->getTemplatedDecl() + ->hasAttr<PointerAttr>(); + return RD->hasAttr<PointerAttr>(); + } + return false; +} + +static bool isPointerType(QualType QT) { + return QT->isPointerOrReferenceType() || isGslPointerType(QT); +} +// Check if a type has an origin. +static bool hasOrigin(const Expr *E) { + return E->isGLValue() || isPointerType(E->getType()); +} + +static bool hasOrigin(const VarDecl *VD) { + return isPointerType(VD->getType()); +} + +/// Creates a loan for the storage path of a given declaration reference. +/// This function should be called whenever a DeclRefExpr represents a borrow. +/// \param DRE The declaration reference expression that initiates the borrow. +/// \return The new Loan on success, nullptr otherwise. +static const Loan *createLoan(FactManager &FactMgr, const DeclRefExpr *DRE) { + if (const auto *VD = dyn_cast<ValueDecl>(DRE->getDecl())) { + AccessPath Path(VD); + // The loan is created at the location of the DeclRefExpr. + return &FactMgr.getLoanMgr().addLoan(Path, DRE); + } + return nullptr; +} + +void FactsGenerator::run() { + llvm::TimeTraceScope TimeProfile("FactGenerator"); + // Iterate through the CFG blocks in reverse post-order to ensure that + // initializations and destructions are processed in the correct sequence. + for (const CFGBlock *Block : *AC.getAnalysis<PostOrderCFGView>()) { + CurrentBlockFacts.clear(); + for (unsigned I = 0; I < Block->size(); ++I) { + const CFGElement &Element = Block->Elements[I]; + if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>()) + Visit(CS->getStmt()); + else if (std::optional<CFGAutomaticObjDtor> DtorOpt = + Element.getAs<CFGAutomaticObjDtor>()) + handleDestructor(*DtorOpt); + } + FactMgr.addBlockFacts(Block, CurrentBlockFacts); + } +} + +void FactsGenerator::VisitDeclStmt(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) + if (const auto *VD = dyn_cast<VarDecl>(D)) + if (hasOrigin(VD)) + if (const Expr *InitExpr = VD->getInit()) + killAndFlowOrigin(*VD, *InitExpr); +} + +void FactsGenerator::VisitDeclRefExpr(const DeclRefExpr *DRE) { + handleUse(DRE); + // For non-pointer/non-view types, a reference to the variable's storage + // is a borrow. We create a loan for it. + // For pointer/view types, we stick to the existing model for now and do + // not create an extra origin for the l-value expression itself. + + // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is + // not sufficient to model the different levels of indirection. The current + // single-origin model cannot distinguish between a loan to the variable's + // storage and a loan to what it points to. A multi-origin model would be + // required for this. + if (!isPointerType(DRE->getType())) { + if (const Loan *L = createLoan(FactMgr, DRE)) { + OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); + CurrentBlockFacts.push_back( + FactMgr.createFact<IssueFact>(L->ID, ExprOID)); + } + } +} + +void FactsGenerator::VisitCXXConstructExpr(const CXXConstructExpr *CCE) { + if (isGslPointerType(CCE->getType())) { + handleGSLPointerConstruction(CCE); + return; + } +} + +void FactsGenerator::VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + // Specifically for conversion operators, + // like `std::string_view p = std::string{};` + if (isGslPointerType(MCE->getType()) && + isa<CXXConversionDecl>(MCE->getCalleeDecl())) { + // The argument is the implicit object itself. + handleFunctionCall(MCE, MCE->getMethodDecl(), + {MCE->getImplicitObjectArgument()}, + /*IsGslConstruction=*/true); + } + if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { + // Construct the argument list, with the implicit 'this' object as the + // first argument. + llvm::SmallVector<const Expr *, 4> Args; + Args.push_back(MCE->getImplicitObjectArgument()); + Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); + + handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); + } +} + +void FactsGenerator::VisitCallExpr(const CallExpr *CE) { + handleFunctionCall(CE, CE->getDirectCallee(), + {CE->getArgs(), CE->getNumArgs()}); +} + +void FactsGenerator::VisitCXXNullPtrLiteralExpr( + const CXXNullPtrLiteralExpr *N) { + /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized + /// pointers can use the same type of loan. + FactMgr.getOriginMgr().getOrCreate(*N); +} + +void FactsGenerator::VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + if (!hasOrigin(ICE)) + return; + // An ImplicitCastExpr node itself gets an origin, which flows from the + // origin of its sub-expression (after stripping its own parens/casts). + killAndFlowOrigin(*ICE, *ICE->getSubExpr()); +} + +void FactsGenerator::VisitUnaryOperator(const UnaryOperator *UO) { + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubExpr = UO->getSubExpr(); + // Taking address of a pointer-type expression is not yet supported and + // will be supported in multi-origin model. + if (isPointerType(SubExpr->getType())) + return; + // The origin of an address-of expression (e.g., &x) is the origin of + // its sub-expression (x). This fact will cause the dataflow analysis + // to propagate any loans held by the sub-expression's origin to the + // origin of this UnaryOperator expression. + killAndFlowOrigin(*UO, *SubExpr); + } +} + +void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) { + if (const Expr *RetExpr = RS->getRetValue()) { + if (hasOrigin(RetExpr)) { + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); + CurrentBlockFacts.push_back(FactMgr.createFact<ReturnOfOriginFact>(OID)); + } + } +} + +void FactsGenerator::VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) + handleAssignment(BO->getLHS(), BO->getRHS()); +} + +void FactsGenerator::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { + // Assignment operators have special "kill-then-propagate" semantics + // and are handled separately. + if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { + handleAssignment(OCE->getArg(0), OCE->getArg(1)); + return; + } + handleFunctionCall(OCE, OCE->getDirectCallee(), + {OCE->getArgs(), OCE->getNumArgs()}, + /*IsGslConstruction=*/false); +} + +void FactsGenerator::VisitCXXFunctionalCastExpr( + const CXXFunctionalCastExpr *FCE) { + // Check if this is a test point marker. If so, we are done with this + // expression. + if (handleTestPoint(FCE)) + return; + if (isGslPointerType(FCE->getType())) + killAndFlowOrigin(*FCE, *FCE->getSubExpr()); +} + +void FactsGenerator::VisitInitListExpr(const InitListExpr *ILE) { + if (!hasOrigin(ILE)) + return; + // For list initialization with a single element, like `View{...}`, the + // origin of the list itself is the origin of its single element. + if (ILE->getNumInits() == 1) + killAndFlowOrigin(*ILE, *ILE->getInit(0)); +} + +void FactsGenerator::VisitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *MTE) { + if (!hasOrigin(MTE)) + return; + // A temporary object's origin is the same as the origin of the + // expression that initializes it. + killAndFlowOrigin(*MTE, *MTE->getSubExpr()); +} + +void FactsGenerator::handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { + /// TODO: Also handle trivial destructors (e.g., for `int` + /// variables) which will never have a CFGAutomaticObjDtor node. + /// TODO: Handle loans to temporaries. + /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the + /// lifetime ends. + const VarDecl *DestructedVD = DtorOpt.getVarDecl(); + if (!DestructedVD) + return; + // Iterate through all loans to see if any expire. + /// TODO(opt): Do better than a linear search to find loans associated with + /// 'DestructedVD'. + for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { + const AccessPath &LoanPath = L.Path; + // Check if the loan is for a stack variable and if that variable + // is the one being destructed. + if (LoanPath.D == DestructedVD) + CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>( + L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); + } +} + +void FactsGenerator::handleGSLPointerConstruction(const CXXConstructExpr *CCE) { + assert(isGslPointerType(CCE->getType())); + if (CCE->getNumArgs() != 1) + return; + if (hasOrigin(CCE->getArg(0))) + killAndFlowOrigin(*CCE, *CCE->getArg(0)); + else + // This could be a new borrow. + handleFunctionCall(CCE, CCE->getConstructor(), + {CCE->getArgs(), CCE->getNumArgs()}, + /*IsGslConstruction=*/true); +} + +/// Checks if a call-like expression creates a borrow by passing a value to a +/// reference parameter, creating an IssueFact if it does. +/// \param IsGslConstruction True if this is a GSL construction where all +/// argument origins should flow to the returned origin. +void FactsGenerator::handleFunctionCall(const Expr *Call, + const FunctionDecl *FD, + ArrayRef<const Expr *> Args, + bool IsGslConstruction) { + // Ignore functions returning values with no origin. + if (!FD || !hasOrigin(Call)) + return; + auto IsArgLifetimeBound = [FD](unsigned I) -> bool { + const ParmVarDecl *PVD = nullptr; + if (const auto *Method = dyn_cast<CXXMethodDecl>(FD); + Method && Method->isInstance()) { + if (I == 0) + // For the 'this' argument, the attribute is on the method itself. + return implicitObjectParamIsLifetimeBound(Method); + if ((I - 1) < Method->getNumParams()) + // For explicit arguments, find the corresponding parameter + // declaration. + PVD = Method->getParamDecl(I - 1); + } else if (I < FD->getNumParams()) + // For free functions or static methods. + PVD = FD->getParamDecl(I); + return PVD ? PVD->hasAttr<clang::LifetimeBoundAttr>() : false; + }; + if (Args.empty()) + return; + bool killedSrc = false; + for (unsigned I = 0; I < Args.size(); ++I) + if (IsGslConstruction || IsArgLifetimeBound(I)) { + if (!killedSrc) { + killedSrc = true; + killAndFlowOrigin(*Call, *Args[I]); + } else + flowOrigin(*Call, *Args[I]); + } +} + +/// Checks if the expression is a `void("__lifetime_test_point_...")` cast. +/// If so, creates a `TestPointFact` and returns true. +bool FactsGenerator::handleTestPoint(const CXXFunctionalCastExpr *FCE) { + if (!FCE->getType()->isVoidType()) + return false; + + const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); + if (const auto *SL = dyn_cast<StringLiteral>(SubExpr)) { + llvm::StringRef LiteralValue = SL->getString(); + const std::string Prefix = "__lifetime_test_point_"; + + if (LiteralValue.starts_with(Prefix)) { + StringRef Annotation = LiteralValue.drop_front(Prefix.length()); + CurrentBlockFacts.push_back( + FactMgr.createFact<TestPointFact>(Annotation)); + return true; + } + } + return false; +} + +void FactsGenerator::handleAssignment(const Expr *LHSExpr, + const Expr *RHSExpr) { + if (!hasOrigin(LHSExpr)) + return; + // Find the underlying variable declaration for the left-hand side. + if (const auto *DRE_LHS = + dyn_cast<DeclRefExpr>(LHSExpr->IgnoreParenImpCasts())) { + markUseAsWrite(DRE_LHS); + if (const auto *VD_LHS = dyn_cast<ValueDecl>(DRE_LHS->getDecl())) { + // Kill the old loans of the destination origin and flow the new loans + // from the source origin. + killAndFlowOrigin(*VD_LHS, *RHSExpr); + } + } +} + +// A DeclRefExpr will be treated as a use of the referenced decl. It will be +// checked for use-after-free unless it is later marked as being written to +// (e.g. on the left-hand side of an assignment). +void FactsGenerator::handleUse(const DeclRefExpr *DRE) { + if (isPointerType(DRE->getType())) { + UseFact *UF = FactMgr.createFact<UseFact>(DRE); + CurrentBlockFacts.push_back(UF); + assert(!UseFacts.contains(DRE)); + UseFacts[DRE] = UF; + } +} + +void FactsGenerator::markUseAsWrite(const DeclRefExpr *DRE) { + if (!isPointerType(DRE->getType())) + return; + assert(UseFacts.contains(DRE)); + UseFacts[DRE]->markAsWritten(); +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeAnnotations.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp index e791224..ad61a42 100644 --- a/clang/lib/Analysis/LifetimeAnnotations.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" @@ -13,8 +13,7 @@ #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" -namespace clang { -namespace lifetimes { +namespace clang::lifetimes { const FunctionDecl * getDeclWithMergedLifetimeBoundAttrs(const FunctionDecl *FD) { @@ -71,5 +70,4 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { return isNormalAssignmentOperator(FD); } -} // namespace lifetimes -} // namespace clang +} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp new file mode 100644 index 0000000..00c7ed90 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp @@ -0,0 +1,77 @@ +//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the main LifetimeSafetyAnalysis class, which coordinates +// the various components (fact generation, loan propagation, live origins +// analysis, and checking) to detect lifetime safety violations in C++ code. +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" +#include <memory> + +namespace clang::lifetimes { +namespace internal { + +LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) + : AC(AC), Reporter(Reporter) {} + +void LifetimeSafetyAnalysis::run() { + llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); + + const CFG &Cfg = *AC.getCFG(); + DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), + /*ShowColors=*/true)); + + FactsGenerator FactGen(FactMgr, AC); + FactGen.run(); + DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); + + /// TODO(opt): Consider optimizing individual blocks before running the + /// dataflow analysis. + /// 1. Expression Origins: These are assigned once and read at most once, + /// forming simple chains. These chains can be compressed into a single + /// assignment. + /// 2. Block-Local Loans: Origins of expressions are never read by other + /// blocks; only Decls are visible. Therefore, loans in a block that + /// never reach an Origin associated with a Decl can be safely dropped by + /// the analysis. + /// 3. Collapse ExpireFacts belonging to same source location into a single + /// Fact. + LoanPropagation = std::make_unique<LoanPropagationAnalysis>( + Cfg, AC, FactMgr, Factory.OriginMapFactory, Factory.LoanSetFactory); + + LiveOrigins = std::make_unique<LiveOriginsAnalysis>( + Cfg, AC, FactMgr, Factory.LivenessMapFactory); + DEBUG_WITH_TYPE("LiveOrigins", + LiveOrigins->dump(llvm::dbgs(), FactMgr.getTestPoints())); + + runLifetimeChecker(*LoanPropagation, *LiveOrigins, FactMgr, AC, Reporter); +} +} // namespace internal + +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) { + internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); + Analysis.run(); +} +} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp new file mode 100644 index 0000000..cddb3f3c --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp @@ -0,0 +1,180 @@ +//===- LiveOrigins.cpp - Live Origins Analysis -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "Dataflow.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang::lifetimes::internal { +namespace { + +/// The dataflow lattice for origin liveness analysis. +/// It tracks which origins are live, why they're live (which UseFact), +/// and the confidence level of that liveness. +struct Lattice { + LivenessMap LiveOrigins; + + Lattice() : LiveOrigins(nullptr) {}; + + explicit Lattice(LivenessMap L) : LiveOrigins(L) {} + + bool operator==(const Lattice &Other) const { + return LiveOrigins == Other.LiveOrigins; + } + + bool operator!=(const Lattice &Other) const { return !(*this == Other); } + + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { + if (LiveOrigins.isEmpty()) + OS << " <empty>\n"; + for (const auto &Entry : LiveOrigins) { + OriginID OID = Entry.first; + const LivenessInfo &Info = Entry.second; + OS << " "; + OM.dump(OID, OS); + OS << " is "; + switch (Info.Kind) { + case LivenessKind::Must: + OS << "definitely"; + break; + case LivenessKind::Maybe: + OS << "maybe"; + break; + case LivenessKind::Dead: + llvm_unreachable("liveness kind of live origins should not be dead."); + } + OS << " live at this point\n"; + } + } +}; + +/// The analysis that tracks which origins are live, with granular information +/// about the causing use fact and confidence level. This is a backward +/// analysis. +class AnalysisImpl + : public DataflowAnalysis<AnalysisImpl, Lattice, Direction::Backward> { + +public: + AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LivenessMap::Factory &SF) + : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} + using DataflowAnalysis<AnalysisImpl, Lattice, Direction::Backward>::transfer; + + StringRef getAnalysisName() const { return "LiveOrigins"; } + + Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } + + /// Merges two lattices by combining liveness information. + /// When the same origin has different confidence levels, we take the lower + /// one. + Lattice join(Lattice L1, Lattice L2) const { + LivenessMap Merged = L1.LiveOrigins; + // Take the earliest UseFact to make the join hermetic and commutative. + auto CombineUseFact = [](const UseFact &A, + const UseFact &B) -> const UseFact * { + return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A + : &B; + }; + auto CombineLivenessKind = [](LivenessKind K1, + LivenessKind K2) -> LivenessKind { + assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); + assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); + // Only return "Must" if both paths are "Must", otherwise Maybe. + if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) + return LivenessKind::Must; + return LivenessKind::Maybe; + }; + auto CombineLivenessInfo = [&](const LivenessInfo *L1, + const LivenessInfo *L2) -> LivenessInfo { + assert((L1 || L2) && "unexpectedly merging 2 empty sets"); + if (!L1) + return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); + if (!L2) + return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); + return LivenessInfo( + CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), + CombineLivenessKind(L1->Kind, L2->Kind)); + }; + return Lattice(utils::join( + L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, + // A symmetric join is required here. If an origin is live on one + // branch but not the other, its confidence must be demoted to `Maybe`. + utils::JoinKind::Symmetric)); + } + + /// A read operation makes the origin live with definite confidence, as it + /// dominates this program point. A write operation kills the liveness of + /// the origin since it overwrites the value. + Lattice transfer(Lattice In, const UseFact &UF) { + OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); + // Write kills liveness. + if (UF.isWritten()) + return Lattice(Factory.remove(In.LiveOrigins, OID)); + // Read makes origin live with definite confidence (dominates this point). + return Lattice(Factory.add(In.LiveOrigins, OID, + LivenessInfo(&UF, LivenessKind::Must))); + } + + /// Issuing a new loan to an origin kills its liveness. + Lattice transfer(Lattice In, const IssueFact &IF) { + return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); + } + + /// An OriginFlow kills the liveness of the destination origin if `KillDest` + /// is true. Otherwise, it propagates liveness from destination to source. + Lattice transfer(Lattice In, const OriginFlowFact &OF) { + if (!OF.getKillDest()) + return In; + return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); + } + + LivenessMap getLiveOriginsAt(ProgramPoint P) const { + return getState(P).LiveOrigins; + } + + // Dump liveness values on all test points in the program. + void dump(llvm::raw_ostream &OS, + llvm::StringMap<ProgramPoint> TestPoints) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + for (const auto &Entry : TestPoints) { + OS << "TestPoint: " << Entry.getKey() << "\n"; + getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); + } + } + +private: + FactManager &FactMgr; + LivenessMap::Factory &Factory; +}; +} // namespace + +// PImpl wrapper implementation +class LiveOriginsAnalysis::Impl : public AnalysisImpl { + using AnalysisImpl::AnalysisImpl; +}; + +LiveOriginsAnalysis::LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, + FactManager &F, + LivenessMap::Factory &SF) + : PImpl(std::make_unique<Impl>(C, AC, F, SF)) { + PImpl->run(); +} + +LiveOriginsAnalysis::~LiveOriginsAnalysis() = default; + +LivenessMap LiveOriginsAnalysis::getLiveOriginsAt(ProgramPoint P) const { + return PImpl->getLiveOriginsAt(P); +} + +void LiveOriginsAnalysis::dump(llvm::raw_ostream &OS, + llvm::StringMap<ProgramPoint> TestPoints) const { + PImpl->dump(OS, TestPoints); +} +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp new file mode 100644 index 0000000..387097e --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp @@ -0,0 +1,138 @@ +//===- LoanPropagation.cpp - Loan Propagation Analysis ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "Dataflow.h" +#include <memory> + +namespace clang::lifetimes::internal { +namespace { +/// Represents the dataflow lattice for loan propagation. +/// +/// This lattice tracks which loans each origin may hold at a given program +/// point.The lattice has a finite height: An origin's loan set is bounded by +/// the total number of loans in the function. +/// TODO(opt): To reduce the lattice size, propagate origins of declarations, +/// not expressions, because expressions are not visible across blocks. +struct Lattice { + /// The map from an origin to the set of loans it contains. + OriginLoanMap Origins = OriginLoanMap(nullptr); + + explicit Lattice(const OriginLoanMap &S) : Origins(S) {} + Lattice() = default; + + bool operator==(const Lattice &Other) const { + return Origins == Other.Origins; + } + bool operator!=(const Lattice &Other) const { return !(*this == Other); } + + void dump(llvm::raw_ostream &OS) const { + OS << "LoanPropagationLattice State:\n"; + if (Origins.isEmpty()) + OS << " <empty>\n"; + for (const auto &Entry : Origins) { + if (Entry.second.isEmpty()) + OS << " Origin " << Entry.first << " contains no loans\n"; + for (const LoanID &LID : Entry.second) + OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; + } + } +}; + +class AnalysisImpl + : public DataflowAnalysis<AnalysisImpl, Lattice, Direction::Forward> { +public: + AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), + LoanSetFactory(LoanSetFactory) {} + + using Base::transfer; + + StringRef getAnalysisName() const { return "LoanPropagation"; } + + Lattice getInitialState() { return Lattice{}; } + + /// Merges two lattices by taking the union of loans for each origin. + // TODO(opt): Keep the state small by removing origins which become dead. + Lattice join(Lattice A, Lattice B) { + OriginLoanMap JoinedOrigins = utils::join( + A.Origins, B.Origins, OriginLoanMapFactory, + [&](const LoanSet *S1, const LoanSet *S2) { + assert((S1 || S2) && "unexpectedly merging 2 empty sets"); + if (!S1) + return *S2; + if (!S2) + return *S1; + return utils::join(*S1, *S2, LoanSetFactory); + }, + // Asymmetric join is a performance win. For origins present only on one + // branch, the loan set can be carried over as-is. + utils::JoinKind::Asymmetric); + return Lattice(JoinedOrigins); + } + + /// A new loan is issued to the origin. Old loans are erased. + Lattice transfer(Lattice In, const IssueFact &F) { + OriginID OID = F.getOriginID(); + LoanID LID = F.getLoanID(); + return Lattice(OriginLoanMapFactory.add( + In.Origins, OID, + LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); + } + + /// A flow from source to destination. If `KillDest` is true, this replaces + /// the destination's loans with the source's. Otherwise, the source's loans + /// are merged into the destination's. + Lattice transfer(Lattice In, const OriginFlowFact &F) { + OriginID DestOID = F.getDestOriginID(); + OriginID SrcOID = F.getSrcOriginID(); + + LoanSet DestLoans = + F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); + LoanSet SrcLoans = getLoans(In, SrcOID); + LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); + + return Lattice(OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); + } + + LoanSet getLoans(OriginID OID, ProgramPoint P) const { + return getLoans(getState(P), OID); + } + +private: + LoanSet getLoans(Lattice L, OriginID OID) const { + if (auto *Loans = L.Origins.lookup(OID)) + return *Loans; + return LoanSetFactory.getEmptySet(); + } + + OriginLoanMap::Factory &OriginLoanMapFactory; + LoanSet::Factory &LoanSetFactory; +}; +} // namespace + +class LoanPropagationAnalysis::Impl final : public AnalysisImpl { + using AnalysisImpl::AnalysisImpl; +}; + +LoanPropagationAnalysis::LoanPropagationAnalysis( + const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : PImpl(std::make_unique<Impl>(C, AC, F, OriginLoanMapFactory, + LoanSetFactory)) { + PImpl->run(); +} + +LoanPropagationAnalysis::~LoanPropagationAnalysis() = default; + +LoanSet LoanPropagationAnalysis::getLoans(OriginID OID, ProgramPoint P) const { + return PImpl->getLoans(OID, P); +} +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Loans.cpp b/clang/lib/Analysis/LifetimeSafety/Loans.cpp new file mode 100644 index 0000000..2c85a3c --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Loans.cpp @@ -0,0 +1,18 @@ +//===- Loans.cpp - Loan Implementation --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" + +namespace clang::lifetimes::internal { + +void Loan::dump(llvm::raw_ostream &OS) const { + OS << ID << " (Path: "; + OS << Path.D->getNameAsString() << ")"; +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp new file mode 100644 index 0000000..ea51a75 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp @@ -0,0 +1,89 @@ +//===- Origins.cpp - Origin Implementation -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" + +namespace clang::lifetimes::internal { + +void OriginManager::dump(OriginID OID, llvm::raw_ostream &OS) const { + OS << OID << " ("; + Origin O = getOrigin(OID); + if (const ValueDecl *VD = O.getDecl()) + OS << "Decl: " << VD->getNameAsString(); + else if (const Expr *E = O.getExpr()) + OS << "Expr: " << E->getStmtClassName(); + else + OS << "Unknown"; + OS << ")"; +} + +Origin &OriginManager::addOrigin(OriginID ID, const clang::ValueDecl &D) { + AllOrigins.emplace_back(ID, &D); + return AllOrigins.back(); +} + +Origin &OriginManager::addOrigin(OriginID ID, const clang::Expr &E) { + AllOrigins.emplace_back(ID, &E); + return AllOrigins.back(); +} + +// TODO: Mark this method as const once we remove the call to getOrCreate. +OriginID OriginManager::get(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + // If the expression itself has no specific origin, and it's a reference + // to a declaration, its origin is that of the declaration it refers to. + // For pointer types, where we don't pre-emptively create an origin for the + // DeclRefExpr itself. + if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) + return get(*DRE->getDecl()); + // TODO: This should be an assert(It != ExprToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + return getOrCreate(E); +} + +OriginID OriginManager::get(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + // TODO: This should be an assert(It != DeclToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + if (It == DeclToOriginID.end()) + return getOrCreate(D); + + return It->second; +} + +OriginID OriginManager::getOrCreate(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + + OriginID NewID = getNextOriginID(); + addOrigin(NewID, E); + ExprToOriginID[&E] = NewID; + return NewID; +} + +const Origin &OriginManager::getOrigin(OriginID ID) const { + assert(ID.Value < AllOrigins.size()); + return AllOrigins[ID.Value]; +} + +OriginID OriginManager::getOrCreate(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + if (It != DeclToOriginID.end()) + return It->second; + OriginID NewID = getNextOriginID(); + addOrigin(NewID, D); + DeclToOriginID[&D] = NewID; + return NewID; +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index e9ca8ce..9abaf79 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -29,7 +29,7 @@ #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/Analyses/CalledOnceCheck.h" #include "clang/Analysis/Analyses/Consumed.h" -#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" #include "clang/Analysis/Analyses/ReachableCode.h" #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/Analysis/Analyses/UninitializedValues.h" diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index 51e0ee1..0ebf56e 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -111,6 +111,7 @@ add_clang_library(clangSema clangAPINotes clangAST clangAnalysis + clangAnalysisLifetimeSafety clangBasic clangEdit clangLex diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index e8a7ad3..8aebf53 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -10,7 +10,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Sema.h" diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index 35cdfbf..0d8d0fa 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -17,7 +17,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/TypeLoc.h" -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Sema/SemaObjC.h" |