//===--- NoRecursionCheck.cpp - clang-tidy --------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "NoRecursionCheck.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Analysis/CallGraph.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/SCCIterator.h" using namespace clang::ast_matchers; namespace clang { namespace tidy { namespace misc { namespace { /// Much like SmallSet, with two differences: /// 1. It can *only* be constructed from an ArrayRef<>. If the element count /// is small, there is no copy and said storage *must* outlive us. /// 2. it is immutable, the way it was constructed it will stay. template class ImmutableSmallSet { ArrayRef Vector; llvm::DenseSet Set; static_assert(SmallSize <= 32, "N should be small"); bool isSmall() const { return Set.empty(); } public: using size_type = size_t; ImmutableSmallSet() = delete; ImmutableSmallSet(const ImmutableSmallSet &) = delete; ImmutableSmallSet(ImmutableSmallSet &&) = delete; T &operator=(const ImmutableSmallSet &) = delete; T &operator=(ImmutableSmallSet &&) = delete; // WARNING: Storage *must* outlive us if we decide that the size is small. ImmutableSmallSet(ArrayRef Storage) { // Is size small-enough to just keep using the existing storage? if (Storage.size() <= SmallSize) { Vector = Storage; return; } // We've decided that it isn't performant to keep using vector. // Let's migrate the data into Set. Set.reserve(Storage.size()); Set.insert(Storage.begin(), Storage.end()); } /// count - Return 1 if the element is in the set, 0 otherwise. size_type count(const T &V) const { if (isSmall()) { // Since the collection is small, just do a linear search. return llvm::find(Vector, V) == Vector.end() ? 0 : 1; } return Set.count(V); } }; /// Much like SmallSetVector, but with one difference: /// when the size is \p SmallSize or less, when checking whether an element is /// already in the set or not, we perform linear search over the vector, /// but if the size is larger than \p SmallSize, we look in set. /// FIXME: upstream this into SetVector/SmallSetVector itself. template class SmartSmallSetVector { public: using size_type = size_t; private: SmallVector Vector; llvm::DenseSet Set; static_assert(SmallSize <= 32, "N should be small"); // Are we still using Vector for uniqness tracking? bool isSmall() const { return Set.empty(); } // Will one more entry cause Vector to switch away from small-size storage? bool entiretyOfVectorSmallSizeIsOccupied() const { assert(isSmall() && Vector.size() <= SmallSize && "Shouldn't ask if we have already [should have] migrated into Set."); return Vector.size() == SmallSize; } void populateSet() { assert(Set.empty() && "Should not have already utilized the Set."); // Magical growth factor prediction - to how many elements do we expect to // sanely grow after switching away from small-size storage? const size_t NewMaxElts = 4 * Vector.size(); Vector.reserve(NewMaxElts); Set.reserve(NewMaxElts); Set.insert(Vector.begin(), Vector.end()); } /// count - Return 1 if the element is in the set, 0 otherwise. size_type count(const T &V) const { if (isSmall()) { // Since the collection is small, just do a linear search. return llvm::find(Vector, V) == Vector.end() ? 0 : 1; } // Look-up in the Set. return Set.count(V); } bool setInsert(const T &V) { if (count(V) != 0) return false; // Already exists. // Does not exist, Can/need to record it. if (isSmall()) { // Are we still using Vector for uniqness tracking? // Will one more entry fit within small-sized Vector? if (!entiretyOfVectorSmallSizeIsOccupied()) return true; // We'll insert into vector right afterwards anyway. // Time to switch to Set. populateSet(); } // Set time! // Note that this must be after `populateSet()` might have been called. bool SetInsertionSucceeded = Set.insert(V).second; (void)SetInsertionSucceeded; assert(SetInsertionSucceeded && "We did check that no such value existed"); return true; } public: /// Insert a new element into the SmartSmallSetVector. /// \returns true if the element was inserted into the SmartSmallSetVector. bool insert(const T &X) { bool Result = setInsert(X); if (Result) Vector.push_back(X); return Result; } /// Clear the SmartSmallSetVector and return the underlying vector. decltype(Vector) takeVector() { Set.clear(); return std::move(Vector); } }; constexpr unsigned SmallCallStackSize = 16; constexpr unsigned SmallSCCSize = 32; using CallStackTy = llvm::SmallVector; // In given SCC, find *some* call stack that will be cyclic. // This will only find *one* such stack, it might not be the smallest one, // and there may be other loops. CallStackTy pathfindSomeCycle(ArrayRef SCC) { // We'll need to be able to performantly look up whether some CallGraphNode // is in SCC or not, so cache all the SCC elements in a set. const ImmutableSmallSet SCCElts(SCC); // Is node N part if the current SCC? auto NodeIsPartOfSCC = [&SCCElts](CallGraphNode *N) { return SCCElts.count(N) != 0; }; // Track the call stack that will cause a cycle. SmartSmallSetVector CallStackSet; // Arbitrarily take the first element of SCC as entry point. CallGraphNode::CallRecord EntryNode(SCC.front(), /*CallExpr=*/nullptr); // Continue recursing into subsequent callees that are part of this SCC, // and are thus known to be part of the call graph loop, until loop forms. CallGraphNode::CallRecord *Node = &EntryNode; while (true) { // Did we see this node before? if (!CallStackSet.insert(*Node)) break; // Cycle completed! Note that didn't insert the node into stack! // Else, perform depth-first traversal: out of all callees, pick first one // that is part of this SCC. This is not guaranteed to yield shortest cycle. Node = llvm::find_if(Node->Callee->callees(), NodeIsPartOfSCC); } // Note that we failed to insert the last node, that completes the cycle. // But we really want to have it. So insert it manually into stack only. CallStackTy CallStack = CallStackSet.takeVector(); CallStack.emplace_back(*Node); return CallStack; } } // namespace void NoRecursionCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher(translationUnitDecl().bind("TUDecl"), this); } void NoRecursionCheck::handleSCC(ArrayRef SCC) { assert(!SCC.empty() && "Empty SCC does not make sense."); // First of all, call out every strongly connected function. for (CallGraphNode *N : SCC) { FunctionDecl *D = N->getDefinition(); diag(D->getLocation(), "function %0 is within a recursive call chain") << D; } // Now, SCC only tells us about strongly connected function declarations in // the call graph. It doesn't *really* tell us about the cycles they form. // And there may be more than one cycle in SCC. // So let's form a call stack that eventually exposes *some* cycle. const CallStackTy EventuallyCyclicCallStack = pathfindSomeCycle(SCC); assert(!EventuallyCyclicCallStack.empty() && "We should've found the cycle"); // While last node of the call stack does cause a loop, due to the way we // pathfind the cycle, the loop does not necessarily begin at the first node // of the call stack, so drop front nodes of the call stack until it does. const auto CyclicCallStack = ArrayRef(EventuallyCyclicCallStack) .drop_until([LastNode = EventuallyCyclicCallStack.back()]( CallGraphNode::CallRecord FrontNode) { return FrontNode == LastNode; }); assert(CyclicCallStack.size() >= 2 && "Cycle requires at least 2 frames"); // Which function we decided to be the entry point that lead to the recursion? FunctionDecl *CycleEntryFn = CyclicCallStack.front().Callee->getDefinition(); // And now, for ease of understanding, let's print the call sequence that // forms the cycle in question. diag(CycleEntryFn->getLocation(), "example recursive call chain, starting from function %0", DiagnosticIDs::Note) << CycleEntryFn; for (int CurFrame = 1, NumFrames = CyclicCallStack.size(); CurFrame != NumFrames; ++CurFrame) { CallGraphNode::CallRecord PrevNode = CyclicCallStack[CurFrame - 1]; CallGraphNode::CallRecord CurrNode = CyclicCallStack[CurFrame]; Decl *PrevDecl = PrevNode.Callee->getDecl(); Decl *CurrDecl = CurrNode.Callee->getDecl(); diag(CurrNode.CallExpr->getBeginLoc(), "Frame #%0: function %1 calls function %2 here:", DiagnosticIDs::Note) << CurFrame << cast(PrevDecl) << cast(CurrDecl); } diag(CyclicCallStack.back().CallExpr->getBeginLoc(), "... which was the starting point of the recursive call chain; there " "may be other cycles", DiagnosticIDs::Note); } void NoRecursionCheck::check(const MatchFinder::MatchResult &Result) { // Build call graph for the entire translation unit. const auto *TU = Result.Nodes.getNodeAs("TUDecl"); CallGraph CG; CG.addToCallGraph(const_cast(TU)); // Look for cycles in call graph, // by looking for Strongly Connected Components (SCC's) for (llvm::scc_iterator SCCI = llvm::scc_begin(&CG), SCCE = llvm::scc_end(&CG); SCCI != SCCE; ++SCCI) { if (!SCCI.hasCycle()) // We only care about cycles, not standalone nodes. continue; handleSCC(*SCCI); } } } // namespace misc } // namespace tidy } // namespace clang