diff options
| author | Ziqing Luo <ziqing_luo@apple.com> | 2026-04-24 15:44:52 -0700 |
|---|---|---|
| committer | Ziqing Luo <ziqing_luo@apple.com> | 2026-04-24 15:44:52 -0700 |
| commit | cb5bfb159639bf46d92c29a1988624e028e5a6e9 (patch) | |
| tree | 911f2c2cceaf50ed9316909d17a5c272c80a02c0 | |
| parent | f6463b7c439f205900db97779bd4464128a3f636 (diff) | |
| download | llvm-origin/users/ziqingluo/PR-172429193-3.tar.gz llvm-origin/users/ziqingluo/PR-172429193-3.tar.bz2 llvm-origin/users/ziqingluo/PR-172429193-3.zip | |
address commentsorigin/users/ziqingluo/PR-172429193-3
3 files changed, 228 insertions, 213 deletions
diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp index 29af93044e81..273687cd4533 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp @@ -33,220 +33,28 @@ namespace { using namespace clang; using namespace ssaf; -/// Match and extract assignments. -/// The extraction function 'XF' can be described by the following rules: -/// -/// XF(l = r) := add edge "toEPL(l) -> toEPL(r))" -/// XF(foo(a, b, ...)) := XF(Param_1 = a), XF(Param_2 = b), ... -/// XF(return e;) := XF(Fun = e), where 'Fun' is the enclosing -/// function -/// XF(ctor(a, ...) : x1(y1), ... {...}) -/// := XF(Param_1 = a), ..., -/// XF(x1 = y1), ..., -/// ctor's body will be visited separately. -/// XF(T var = e) := XF(Var = e) -/// XF(T var = init-list) := see \ref matchInitializerList -class PointerAssignmentMatcher { - - /// Match initializer lists of the form 'Var = {a, b, c, ...}': - /// - /// If 'Var' is a struct/union: - /// XF(Var = {a, b, c, ...}) := XF(Var.field_1 = a) - /// XF(Var.field_2 = b) - /// ... - /// If 'Var' is an array: - /// XF(Var = {a, b, c, ...}) := XF(*Var = a) - /// XF(*Var = b) - /// ... - /// - /// The process is recursive: 'a', 'b', 'c', ... may themselves be - /// initializer lists. We therefore use \p ArrayElementIndirectLevel to keep - /// track of the pointer level the left-hand side. - llvm::Error matchInitializerList(const ValueDecl *Base, const Expr *InitExpr, - unsigned ArrayElementIndirectLevel = 0) { - const InitListExpr *ILE = dyn_cast<InitListExpr>(InitExpr); - - if (!ILE) { - if (!hasPtrOrArrType(InitExpr)) - return llvm::Error::success(); - - auto BaseEPL = toEPL(Base); - - if (!BaseEPL) - return BaseEPL.takeError(); - - // Apply ArrayElementIndirectLevel to BaseEPL - auto R = llvm::map_range(*BaseEPL, [&ArrayElementIndirectLevel]( - const EntityPointerLevel &EPL) { - EntityPointerLevel Result = EPL; - for (unsigned I = 0; I < ArrayElementIndirectLevel; ++I) - Result = incrementPointerLevel(Result); - return Result; - }); - return addEdges(EntityPointerLevelSet{R.begin(), R.end()}, - toEPL(InitExpr)); - } - // Note that `Base`'s type is NOT the real LHS type when - // ArrayElementIndirectLevel > 0: - QualType Type = InitExpr->getType(); - - if (auto *RD = Type->getAsRecordDecl()) - return matchInitializerListForRecordDecl(RD, ILE); - if (Type->isArrayType()) - return matchInitializerListForArray(Base, ILE, ArrayElementIndirectLevel); - // Must be the case of using a initializer-list for a scalar: - return matchInitializerList(Base, ILE->getInit(0)); - } - - // Helper function for matchInitializerList that handles record: - llvm::Error matchInitializerListForRecordDecl(const RecordDecl *RecordTy, - const InitListExpr *ILE) { - if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RecordTy)) - if (CXXRD->getNumBases() != 0) { - // FIXME: support this: - return makeErrAtNode( - Ctx, ILE, - "attempt to create pointer assignment edges between " - "CXXRecordDecls with base classes and initializer-lists"); - } - // Handle union: - if (RecordTy->isUnion()) { - auto *InitField = ILE->getInitializedFieldInUnion(); - - if (!InitField) - return llvm::Error::success(); - assert(!ILE->inits().empty()); - return matchInitializerList(InitField, ILE->getInit(0)); - } - // Handle struct/class: - ILE = ILE->isSemanticForm() ? ILE : ILE->getSemanticForm(); - - auto FieldIter = RecordTy->field_begin(); - - assert(RecordTy->getNumFields() >= ILE->getNumInits()); - for (auto *Init : ILE->inits()) - if (auto Err = matchInitializerList(*(FieldIter++), Init)) - return Err; - return llvm::Error::success(); - } - - // Helper function for matchInitializerList that handles array: - llvm::Error matchInitializerListForArray(const ValueDecl *Array, - const InitListExpr *ILE, - unsigned ArrayIndirectLevel = 0) { - for (auto *E : ILE->inits()) - if (auto Err = matchInitializerList(Array, E, ArrayIndirectLevel + 1)) - return Err; - return llvm::Error::success(); - } - +class PointerFlowMatcher { public: EdgeSet Results; + ASTContext &Ctx; - PointerAssignmentMatcher( + PointerFlowMatcher( ASTContext &Ctx, std::function<EntityId(const EntityName &)> AddEntity) : Ctx(Ctx), AddEntity(AddEntity) {} - llvm::Error matches(const DynTypedNode &DynNode, const NamedDecl *RootDecl) { - if (const Stmt *S = DynNode.get<Stmt>()) { - // Match 'p = q' whenever it has pointer or array type: - if (const auto *BO = dyn_cast<BinaryOperator>(S); - BO && BO->getOpcode() == BO_Assign && hasPtrOrArrType(BO)) { - return addEdges(toEPL(BO->getLHS()), toEPL(BO->getRHS())); - } - - // Match arg-to-param passing (in CallExpr) for any pointer type argument: - if (const auto *CE = dyn_cast<CallExpr>(S)) { - const FunctionDecl *FD = CE->getDirectCallee(); - - if (!FD) - return llvm::Error::success(); - - unsigned ArgIdx = 0; - - if (isa<CXXOperatorCallExpr>(CE)) - if (auto *MD = dyn_cast<CXXMethodDecl>(FD); - MD && !MD->isExplicitObjectMemberFunction()) - ArgIdx = 1; - return matchArgsWithParams(ArgIdx, FD, CE); - } - // Match arg-to-param passing (in CXXConstructExpr) for any pointer type - // argument: - if (const auto *CCE = dyn_cast<CXXConstructExpr>(S)) { - return matchArgsWithParams(/*ArgIdxStart=*/0, CCE->getConstructor(), - CCE); - } - if (const auto *RS = dyn_cast<ReturnStmt>(S)) { - const Expr *RetExpr = RS->getRetValue(); - if (!hasPtrOrArrType(RetExpr)) - return llvm::Error::success(); - return addEdges(toEPL(RootDecl, true), toEPL(RetExpr)); - } - } - - if (const Decl *D = DynNode.get<Decl>()) { - const Expr *InitExpr = nullptr; - - if (const auto *VD = dyn_cast<ValueDecl>(D)) { - if (const auto *Var = dyn_cast<VarDecl>(VD)) - InitExpr = Var->getInit(); - if (const auto *Fd = dyn_cast<FieldDecl>(VD)) - InitExpr = Fd->getInClassInitializer(); - - // Match initializer-list: - if (auto *InitLst = dyn_cast_or_null<InitListExpr>(InitExpr)) - return matchInitializerList(VD, InitLst); - - // Match initializers to variables/fields of a pointer type: - if (InitExpr && hasPtrOrArrType(VD)) - return addEdges(toEPL(VD), toEPL(InitExpr)); - } - - // Match C++ constructor member-initializers: - if (const auto *CtorD = dyn_cast<CXXConstructorDecl>(D)) { - for (auto *E : CtorD->inits()) { - if (E->isDelegatingInitializer()) - return matches(DynTypedNode::create(*E->getInit()), RootDecl); - if (const FieldDecl *FD = E->getMember(); FD && hasPtrOrArrType(FD)) { - if (auto Err = addEdges(toEPL(E->getMember()), toEPL(E->getInit()))) - return Err; - } - } - return llvm::Error::success(); - } - } - return llvm::Error::success(); - } + llvm::Error matchInitializerList(const ValueDecl *Base, const Expr *InitExpr, + unsigned ArrayElementIndirectLevel = 0); + llvm::Error matches(const DynTypedNode &DynNode, const NamedDecl *RootDecl); private: - ASTContext &Ctx; std::function<EntityId(const EntityName &)> AddEntity; - Expected<EntityPointerLevelSet> toEPL(const NamedDecl *N, - bool IsRet = false) { - auto Ret = createEntityPointerLevel(N, AddEntity, IsRet); + Expected<EntityPointerLevelSet> toEPL(const NamedDecl *N, bool IsRet = false); - if (Ret) - return EntityPointerLevelSet{*Ret}; - return Ret.takeError(); - } - - Expected<EntityPointerLevelSet> toEPL(const Expr *N) { - return translateEntityPointerLevel(N, Ctx, AddEntity); - } + Expected<EntityPointerLevelSet> toEPL(const Expr *N); llvm::Error addEdges(Expected<EntityPointerLevelSet> &&LHS, - Expected<EntityPointerLevelSet> &&RHS) { - if (!LHS && !RHS) - return llvm::joinErrors(LHS.takeError(), RHS.takeError()); - if (!LHS) - return LHS.takeError(); - if (!RHS) - return RHS.takeError(); - for (auto L : *LHS) - Results[L].insert(RHS->begin(), RHS->end()); - return llvm::Error::success(); - } + Expected<EntityPointerLevelSet> &&RHS); template <typename ParmsProvider, typename ArgsProvider> llvm::Error matchArgsWithParams(unsigned ArgIdxStart, ParmsProvider *PP, @@ -265,6 +73,214 @@ private: return llvm::Error::success(); } }; + +Expected<EntityPointerLevelSet> +PointerFlowMatcher::toEPL(const NamedDecl *N, bool IsRet) { + auto Ret = createEntityPointerLevel(N, AddEntity, IsRet); + + if (Ret) + return EntityPointerLevelSet{*Ret}; + return Ret.takeError(); +} + +Expected<EntityPointerLevelSet> PointerFlowMatcher::toEPL(const Expr *N) { + return translateEntityPointerLevel(N, Ctx, AddEntity); +} + +llvm::Error +PointerFlowMatcher::addEdges(Expected<EntityPointerLevelSet> &&LHS, + Expected<EntityPointerLevelSet> &&RHS) { + if (!LHS && !RHS) + return llvm::joinErrors(LHS.takeError(), RHS.takeError()); + if (!LHS) + return LHS.takeError(); + if (!RHS) + return RHS.takeError(); + for (auto L : *LHS) + Results[L].insert(RHS->begin(), RHS->end()); + return llvm::Error::success(); +} + +/// Match and extract assignments. +/// The extraction function 'XF' can be described by the following rules: +/// +/// XF(l = r) := add edge "toEPL(l) -> toEPL(r))" +/// XF(foo(a, b, ...)) := XF(Param_1 = a), XF(Param_2 = b), ... +/// XF(return e;) := XF(Fun = e), where 'Fun' is the enclosing +/// function +/// XF(ctor(a, ...) : x1(y1), ... {...}) +/// := XF(Param_1 = a), ..., +/// XF(x1 = y1), ..., +/// ctor's body will be visited separately. +/// XF(T var = e) := XF(Var = e) +/// XF(T var = init-list) := see \ref +/// PointerAssignmentMatcher::matchInitializerList +llvm::Error PointerFlowMatcher::matches(const DynTypedNode &DynNode, + const NamedDecl *RootDecl) { + if (const Stmt *S = DynNode.get<Stmt>()) { + // Match 'p = q' whenever it has pointer or array type: + if (const auto *BO = dyn_cast<BinaryOperator>(S); + BO && BO->getOpcode() == BO_Assign && hasPtrOrArrType(BO)) { + return addEdges(toEPL(BO->getLHS()), toEPL(BO->getRHS())); + } + + // Match arg-to-param passing (in CallExpr) for any pointer type argument: + if (const auto *CE = dyn_cast<CallExpr>(S)) { + const FunctionDecl *FD = CE->getDirectCallee(); + + if (!FD) + return llvm::Error::success(); + + unsigned ArgIdx = 0; + + if (isa<CXXOperatorCallExpr>(CE)) + if (auto *MD = dyn_cast<CXXMethodDecl>(FD); + MD && !MD->isExplicitObjectMemberFunction()) + ArgIdx = 1; + return matchArgsWithParams(ArgIdx, FD, CE); + } + // Match arg-to-param passing (in CXXConstructExpr) for any pointer type + // argument: + if (const auto *CCE = dyn_cast<CXXConstructExpr>(S)) { + return matchArgsWithParams(/*ArgIdxStart=*/0, CCE->getConstructor(), CCE); + } + if (const auto *RS = dyn_cast<ReturnStmt>(S)) { + const Expr *RetExpr = RS->getRetValue(); + if (!hasPtrOrArrType(RetExpr)) + return llvm::Error::success(); + return addEdges(toEPL(RootDecl, true), toEPL(RetExpr)); + } + } + + if (const Decl *D = DynNode.get<Decl>()) { + const Expr *InitExpr = nullptr; + + if (const auto *VD = dyn_cast<ValueDecl>(D)) { + if (const auto *Var = dyn_cast<VarDecl>(VD)) + InitExpr = Var->getInit(); + if (const auto *Fd = dyn_cast<FieldDecl>(VD)) + InitExpr = Fd->getInClassInitializer(); + + // Match initializer-list: + if (auto *InitLst = dyn_cast_or_null<InitListExpr>(InitExpr)) + return matchInitializerList(VD, InitLst); + + // Match initializers to variables/fields of a pointer type: + if (InitExpr && hasPtrOrArrType(VD)) + return addEdges(toEPL(VD), toEPL(InitExpr)); + } + + // Match C++ constructor member-initializers: + if (const auto *CtorD = dyn_cast<CXXConstructorDecl>(D)) { + for (auto *E : CtorD->inits()) { + if (E->isDelegatingInitializer()) + return matches(DynTypedNode::create(*E->getInit()), RootDecl); + if (const FieldDecl *FD = E->getMember(); FD && hasPtrOrArrType(FD)) { + if (auto Err = addEdges(toEPL(E->getMember()), toEPL(E->getInit()))) + return Err; + } + } + return llvm::Error::success(); + } + } + return llvm::Error::success(); +} + +// Helper function for matchInitializerList that handles record: +llvm::Error matchInitializerListForRecordDecl(PointerFlowMatcher &Matcher, + const RecordDecl *RecordTy, + const InitListExpr *ILE) { + if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RecordTy)) + if (CXXRD->getNumBases() != 0) { + // FIXME: support this: + return makeErrAtNode( + Matcher.Ctx, ILE, + "attempt to create pointer assignment edges between " + "CXXRecordDecls with base classes and initializer-lists"); + } + // Handle union: + if (RecordTy->isUnion()) { + auto *InitField = ILE->getInitializedFieldInUnion(); + + if (!InitField) + return llvm::Error::success(); + assert(!ILE->inits().empty()); + return Matcher.matchInitializerList(InitField, ILE->getInit(0)); + } + // Handle struct/class: + ILE = ILE->isSemanticForm() ? ILE : ILE->getSemanticForm(); + + auto FieldIter = RecordTy->field_begin(); + + assert(RecordTy->getNumFields() >= ILE->getNumInits()); + for (auto *Init : ILE->inits()) + if (auto Err = Matcher.matchInitializerList(*(FieldIter++), Init)) + return Err; + return llvm::Error::success(); +} + +// Helper function for matchInitializerList that handles array: +llvm::Error matchInitializerListForArray(PointerFlowMatcher &Matcher, + const ValueDecl *Array, + const InitListExpr *ILE, + unsigned ArrayIndirectLevel = 0) { + for (auto *E : ILE->inits()) + if (auto Err = + Matcher.matchInitializerList(Array, E, ArrayIndirectLevel + 1)) + return Err; + return llvm::Error::success(); +} + +/// Match initializer lists of the form 'Var = {a, b, c, ...}': +/// +/// If 'Var' is a struct/union: +/// XF(Var = {a, b, c, ...}) := XF(Var.field_1 = a) +/// XF(Var.field_2 = b) +/// ... +/// If 'Var' is an array: +/// XF(Var = {a, b, c, ...}) := XF(*Var = a) +/// XF(*Var = b) +/// ... +/// +/// The process is recursive: 'a', 'b', 'c', ... may themselves be +/// initializer lists. We therefore use \p ArrayElementIndirectLevel to keep +/// track of the pointer level the left-hand side. +llvm::Error PointerFlowMatcher::matchInitializerList( + const ValueDecl *Base, const Expr *InitExpr, + unsigned ArrayElementIndirectLevel) { + const InitListExpr *ILE = dyn_cast<InitListExpr>(InitExpr); + + if (!ILE) { + if (!hasPtrOrArrType(InitExpr)) + return llvm::Error::success(); + + auto BaseEPL = toEPL(Base); + + if (!BaseEPL) + return BaseEPL.takeError(); + + // Apply ArrayElementIndirectLevel to BaseEPL + auto R = llvm::map_range( + *BaseEPL, [&ArrayElementIndirectLevel](const EntityPointerLevel &EPL) { + EntityPointerLevel Result = EPL; + for (unsigned I = 0; I < ArrayElementIndirectLevel; ++I) + Result = incrementPointerLevel(Result); + return Result; + }); + return addEdges(EntityPointerLevelSet{R.begin(), R.end()}, toEPL(InitExpr)); + } + // Note that `Base`'s type is NOT the real LHS type when + // ArrayElementIndirectLevel > 0: + QualType Type = InitExpr->getType(); + + if (auto *RD = Type->getAsRecordDecl()) + return matchInitializerListForRecordDecl(*this, RD, ILE); + if (Type->isArrayType()) + return matchInitializerListForArray(*this, Base, ILE, + ArrayElementIndirectLevel); + // Must be the case of using a initializer-list for a scalar: + return matchInitializerList(Base, ILE->getInit(0)); +} } // namespace namespace clang::ssaf { @@ -279,7 +295,7 @@ public: Expected<std::unique_ptr<PointerFlowEntitySummary>> extractEntitySummary(const NamedDecl *Contributor, ASTContext &Ctx) { - PointerAssignmentMatcher Matcher( + PointerFlowMatcher Matcher( Ctx, [this](const EntityName &EN) { return addEntity(EN); }); auto MatchAction = [&Matcher, &Contributor](const DynTypedNode &Node) { auto Err = Matcher.matches(Node, Contributor); diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowFormat.cpp b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowFormat.cpp index 21c8d195fb70..d35cfa626deb 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowFormat.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowFormat.cpp @@ -10,6 +10,7 @@ #include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h" #include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h" #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" @@ -52,11 +53,7 @@ summaryToJSON(const EntitySummary &ES, EdgesEntryData.push_back(entityPointerLevelToJSON(RHS, EntityId2JSON)); EdgesData.push_back(Value(std::move(EdgesEntryData))); } - - Object Data; - - Data[PointerFlowKey] = Value(std::move(EdgesData)); - return Data; + return Object{{PointerFlowKey, Value(std::move(EdgesData))}}; } static llvm::Expected<std::unique_ptr<EntitySummary>> @@ -82,13 +79,11 @@ summaryFromJSON(const Object &Data, EntityIdTable &, EdgesEntryData, "a JSON array of EntityPointerLevels with a size " "greater than 1: [lhs, rhs, rhs, ...]"); - auto SrcEPL = - entityPointerLevelFromJSON(*EPLArray->begin(), EntityIdFromJSON); + auto SrcEPL = entityPointerLevelFromJSON((*EPLArray)[0], EntityIdFromJSON); if (!SrcEPL) return SrcEPL.takeError(); - for (const auto &EPLData : - llvm::make_range(EPLArray->begin() + 1, EPLArray->end())) { + for (const auto &EPLData : llvm::drop_begin(*EPLArray)) { auto EPL = entityPointerLevelFromJSON(EPLData, EntityIdFromJSON); if (!EPL) return EPL.takeError(); @@ -99,11 +94,13 @@ summaryFromJSON(const Object &Data, EntityIdTable &, buildPointerFlowEntitySummary(std::move(Edges))); } -struct PointerFlowJSONFormatInfo : JSONFormat::FormatInfo { +namespace { +struct PointerFlowJSONFormatInfo final : JSONFormat::FormatInfo { PointerFlowJSONFormatInfo() : JSONFormat::FormatInfo(PointerFlowEntitySummary::summaryName(), summaryToJSON, summaryFromJSON) {} }; +} // namespace static llvm::Registry<JSONFormat::FormatInfo>::Add<PointerFlowJSONFormatInfo> RegisterPointerFlowJSONFormatInfo( diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageFormat.cpp b/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageFormat.cpp index 88ae6f72e6c0..ce1ef0b3ba69 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageFormat.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageFormat.cpp @@ -68,11 +68,13 @@ deserialize(const Object &Data, EntityIdTable &, return deserializeImpl(Data, Fn); } -struct UnsafeBufferUsageJSONFormatInfo : JSONFormat::FormatInfo { +namespace { +struct UnsafeBufferUsageJSONFormatInfo final : JSONFormat::FormatInfo { UnsafeBufferUsageJSONFormatInfo() : JSONFormat::FormatInfo(UnsafeBufferUsageEntitySummary::summaryName(), serialize, deserialize) {} }; +} // namespace static llvm::Registry<JSONFormat::FormatInfo>::Add< UnsafeBufferUsageJSONFormatInfo> |
