aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
diff options
context:
space:
mode:
authorZixu Wang <zixu_wang@apple.com>2022-03-21 00:53:28 -0700
committerZixu Wang <zixu_wang@apple.com>2022-03-22 13:21:57 -0700
commit89f6b26f1beb2c1344f5cfeb34e405128544c76b (patch)
tree06b93d32cadc4163d407cc6ac7374b32c311a44d /clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
parent57d02900b54bf162ec476da2ce2bd893dcdbe24b (diff)
downloadllvm-89f6b26f1beb2c1344f5cfeb34e405128544c76b.zip
llvm-89f6b26f1beb2c1344f5cfeb34e405128544c76b.tar.gz
llvm-89f6b26f1beb2c1344f5cfeb34e405128544c76b.tar.bz2
[clang][extract-api] Refactor ExtractAPI and improve docs
- The name SymbolGraph is inappropriate and confusing for the new library for clang-extract-api. Refactor and rename things to make it clear that ExtractAPI is the core functionality and SymbolGraph is one serializer for the API information. - Add documentation comments to ExtractAPI classes and methods to improve readability and clearness of the ExtractAPI work. Differential Revision: https://reviews.llvm.org/D122160
Diffstat (limited to 'clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp')
-rw-r--r--clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp450
1 files changed, 450 insertions, 0 deletions
diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
new file mode 100644
index 0000000..b0bc203
--- /dev/null
+++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
@@ -0,0 +1,450 @@
+//===- ExtractAPI/Serialization/SymbolGraphSerializer.cpp -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the SymbolGraphSerializer.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h"
+#include "clang/Basic/Version.h"
+#include "clang/ExtractAPI/API.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/VersionTuple.h"
+
+using namespace clang;
+using namespace clang::extractapi;
+using namespace llvm;
+using namespace llvm::json;
+
+namespace {
+
+/// Helper function to inject a JSON object \p Obj into another object \p Paren
+/// at position \p Key.
+void serializeObject(Object &Paren, StringRef Key, Optional<Object> Obj) {
+ if (Obj)
+ Paren[Key] = std::move(Obj.getValue());
+}
+
+/// Helper function to inject a JSON array \p Array into object \p Paren at
+/// position \p Key.
+void serializeArray(Object &Paren, StringRef Key, Optional<Array> Array) {
+ if (Array)
+ Paren[Key] = std::move(Array.getValue());
+}
+
+/// Serialize a \c VersionTuple \p V with the Symbol Graph semantic version
+/// format.
+///
+/// A semantic version object contains three numeric fields, representing the
+/// \c major, \c minor, and \c patch parts of the version tuple.
+/// For example version tuple 1.0.3 is serialized as:
+/// \code
+/// {
+/// "major" : 1,
+/// "minor" : 0,
+/// "patch" : 3
+/// }
+/// \endcode
+///
+/// \returns \c None if the version \p V is empty, or an \c Object containing
+/// the semantic version representation of \p V.
+Optional<Object> serializeSemanticVersion(const VersionTuple &V) {
+ if (V.empty())
+ return None;
+
+ Object Version;
+ Version["major"] = V.getMajor();
+ Version["minor"] = V.getMinor().getValueOr(0);
+ Version["patch"] = V.getSubminor().getValueOr(0);
+ return Version;
+}
+
+/// Serialize the OS information in the Symbol Graph platform property.
+///
+/// The OS information in Symbol Graph contains the \c name of the OS, and an
+/// optional \c minimumVersion semantic version field.
+Object serializeOperatingSystem(const Triple &T) {
+ Object OS;
+ OS["name"] = T.getOSTypeName(T.getOS());
+ serializeObject(OS, "minimumVersion",
+ serializeSemanticVersion(T.getMinimumSupportedOSVersion()));
+ return OS;
+}
+
+/// Serialize the platform information in the Symbol Graph module section.
+///
+/// The platform object describes a target platform triple in corresponding
+/// three fields: \c architecture, \c vendor, and \c operatingSystem.
+Object serializePlatform(const Triple &T) {
+ Object Platform;
+ Platform["architecture"] = T.getArchName();
+ Platform["vendor"] = T.getVendorName();
+ Platform["operatingSystem"] = serializeOperatingSystem(T);
+ return Platform;
+}
+
+/// Serialize a source location in file.
+///
+/// \param Loc The presumed location to serialize.
+/// \param IncludeFileURI If true, include the file path of \p Loc as a URI.
+/// Defaults to false.
+Object serializeSourcePosition(const PresumedLoc &Loc,
+ bool IncludeFileURI = false) {
+ assert(Loc.isValid() && "invalid source position");
+
+ Object SourcePosition;
+ SourcePosition["line"] = Loc.getLine();
+ SourcePosition["character"] = Loc.getColumn();
+
+ if (IncludeFileURI) {
+ std::string FileURI = "file://";
+ // Normalize file path to use forward slashes for the URI.
+ FileURI += sys::path::convert_to_slash(Loc.getFilename());
+ SourcePosition["uri"] = FileURI;
+ }
+
+ return SourcePosition;
+}
+
+/// Serialize a source range with begin and end locations.
+Object serializeSourceRange(const PresumedLoc &BeginLoc,
+ const PresumedLoc &EndLoc) {
+ Object SourceRange;
+ serializeObject(SourceRange, "start", serializeSourcePosition(BeginLoc));
+ serializeObject(SourceRange, "end", serializeSourcePosition(EndLoc));
+ return SourceRange;
+}
+
+/// Serialize the availability attributes of a symbol.
+///
+/// Availability information contains the introduced, deprecated, and obsoleted
+/// versions of the symbol as semantic versions, if not default.
+/// Availability information also contains flags to indicate if the symbol is
+/// unconditionally unavailable or deprecated,
+/// i.e. \c __attribute__((unavailable)) and \c __attribute__((deprecated)).
+///
+/// \returns \c None if the symbol has default availability attributes, or
+/// an \c Object containing the formatted availability information.
+Optional<Object> serializeAvailability(const AvailabilityInfo &Avail) {
+ if (Avail.isDefault())
+ return None;
+
+ Object Availbility;
+ serializeObject(Availbility, "introducedVersion",
+ serializeSemanticVersion(Avail.Introduced));
+ serializeObject(Availbility, "deprecatedVersion",
+ serializeSemanticVersion(Avail.Deprecated));
+ serializeObject(Availbility, "obsoletedVersion",
+ serializeSemanticVersion(Avail.Obsoleted));
+ if (Avail.isUnavailable())
+ Availbility["isUnconditionallyUnavailable"] = true;
+ if (Avail.isUnconditionallyDeprecated())
+ Availbility["isUnconditionallyDeprecated"] = true;
+
+ return Availbility;
+}
+
+/// Get the short language name string for interface language references.
+StringRef getLanguageName(const LangOptions &LangOpts) {
+ auto Language =
+ LangStandard::getLangStandardForKind(LangOpts.LangStd).getLanguage();
+ switch (Language) {
+ case Language::C:
+ return "c";
+ case Language::ObjC:
+ return "objc";
+
+ // Unsupported language currently
+ case Language::CXX:
+ case Language::ObjCXX:
+ case Language::OpenCL:
+ case Language::OpenCLCXX:
+ case Language::CUDA:
+ case Language::RenderScript:
+ case Language::HIP:
+
+ // Languages that the frontend cannot parse and compile
+ case Language::Unknown:
+ case Language::Asm:
+ case Language::LLVM_IR:
+ llvm_unreachable("Unsupported language kind");
+ }
+
+ llvm_unreachable("Unhandled language kind");
+}
+
+/// Serialize the identifier object as specified by the Symbol Graph format.
+///
+/// The identifier property of a symbol contains the USR for precise and unique
+/// references, and the interface language name.
+Object serializeIdentifier(const APIRecord &Record,
+ const LangOptions &LangOpts) {
+ Object Identifier;
+ Identifier["precise"] = Record.USR;
+ Identifier["interfaceLanguage"] = getLanguageName(LangOpts);
+
+ return Identifier;
+}
+
+/// Serialize the documentation comments attached to a symbol, as specified by
+/// the Symbol Graph format.
+///
+/// The Symbol Graph \c docComment object contains an array of lines. Each line
+/// represents one line of striped documentation comment, with source range
+/// information.
+/// e.g.
+/// \code
+/// /// This is a documentation comment
+/// ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' First line.
+/// /// with multiple lines.
+/// ^~~~~~~~~~~~~~~~~~~~~~~' Second line.
+/// \endcode
+///
+/// \returns \c None if \p Comment is empty, or an \c Object containing the
+/// formatted lines.
+Optional<Object> serializeDocComment(const DocComment &Comment) {
+ if (Comment.empty())
+ return None;
+
+ Object DocComment;
+ Array LinesArray;
+ for (const auto &CommentLine : Comment) {
+ Object Line;
+ Line["text"] = CommentLine.Text;
+ serializeObject(Line, "range",
+ serializeSourceRange(CommentLine.Begin, CommentLine.End));
+ LinesArray.emplace_back(std::move(Line));
+ }
+ serializeArray(DocComment, "lines", LinesArray);
+
+ return DocComment;
+}
+
+/// Serialize the declaration fragments of a symbol.
+///
+/// The Symbol Graph declaration fragments is an array of tagged important
+/// parts of a symbol's declaration. The fragments sequence can be joined to
+/// form spans of declaration text, with attached information useful for
+/// purposes like syntax-highlighting etc. For example:
+/// \code
+/// const int pi; -> "declarationFragments" : [
+/// {
+/// "kind" : "keyword",
+/// "spelling" : "const"
+/// },
+/// {
+/// "kind" : "text",
+/// "spelling" : " "
+/// },
+/// {
+/// "kind" : "typeIdentifier",
+/// "preciseIdentifier" : "c:I",
+/// "spelling" : "int"
+/// },
+/// {
+/// "kind" : "text",
+/// "spelling" : " "
+/// },
+/// {
+/// "kind" : "identifier",
+/// "spelling" : "pi"
+/// }
+/// ]
+/// \endcode
+///
+/// \returns \c None if \p DF is empty, or an \c Array containing the formatted
+/// declaration fragments array.
+Optional<Array> serializeDeclarationFragments(const DeclarationFragments &DF) {
+ if (DF.getFragments().empty())
+ return None;
+
+ Array Fragments;
+ for (const auto &F : DF.getFragments()) {
+ Object Fragment;
+ Fragment["spelling"] = F.Spelling;
+ Fragment["kind"] = DeclarationFragments::getFragmentKindString(F.Kind);
+ if (!F.PreciseIdentifier.empty())
+ Fragment["preciseIdentifier"] = F.PreciseIdentifier;
+ Fragments.emplace_back(std::move(Fragment));
+ }
+
+ return Fragments;
+}
+
+/// Serialize the function signature field of a function, as specified by the
+/// Symbol Graph format.
+///
+/// The Symbol Graph function signature property contains two arrays.
+/// - The \c returns array is the declaration fragments of the return type;
+/// - The \c parameters array contains names and declaration fragments of the
+/// parameters.
+///
+/// \returns \c None if \p FS is empty, or an \c Object containing the
+/// formatted function signature.
+Optional<Object> serializeFunctionSignature(const FunctionSignature &FS) {
+ if (FS.empty())
+ return None;
+
+ Object Signature;
+ serializeArray(Signature, "returns",
+ serializeDeclarationFragments(FS.getReturnType()));
+
+ Array Parameters;
+ for (const auto &P : FS.getParameters()) {
+ Object Parameter;
+ Parameter["name"] = P.Name;
+ serializeArray(Parameter, "declarationFragments",
+ serializeDeclarationFragments(P.Fragments));
+ Parameters.emplace_back(std::move(Parameter));
+ }
+
+ if (!Parameters.empty())
+ Signature["parameters"] = std::move(Parameters);
+
+ return Signature;
+}
+
+/// Serialize the \c names field of a symbol as specified by the Symbol Graph
+/// format.
+///
+/// The Symbol Graph names field contains multiple representations of a symbol
+/// that can be used for different applications:
+/// - \c title : The simple declared name of the symbol;
+/// - \c subHeading : An array of declaration fragments that provides tags,
+/// and potentially more tokens (for example the \c +/- symbol for
+/// Objective-C methods). Can be used as sub-headings for documentation.
+Object serializeNames(const APIRecord &Record) {
+ Object Names;
+ Names["title"] = Record.Name;
+ serializeArray(Names, "subHeading",
+ serializeDeclarationFragments(Record.SubHeading));
+
+ return Names;
+}
+
+/// Serialize the symbol kind information.
+///
+/// The Symbol Graph symbol kind property contains a shorthand \c identifier
+/// which is prefixed by the source language name, useful for tooling to parse
+/// the kind, and a \c displayName for rendering human-readable names.
+Object serializeSymbolKind(const APIRecord &Record,
+ const LangOptions &LangOpts) {
+ Object Kind;
+ switch (Record.getKind()) {
+ case APIRecord::RK_Global:
+ auto *GR = dyn_cast<GlobalRecord>(&Record);
+ switch (GR->GlobalKind) {
+ case GVKind::Function:
+ Kind["identifier"] = (getLanguageName(LangOpts) + ".func").str();
+ Kind["displayName"] = "Function";
+ break;
+ case GVKind::Variable:
+ Kind["identifier"] = (getLanguageName(LangOpts) + ".var").str();
+ Kind["displayName"] = "Global Variable";
+ break;
+ case GVKind::Unknown:
+ // Unknown global kind
+ break;
+ }
+ break;
+ }
+
+ return Kind;
+}
+
+} // namespace
+
+void SymbolGraphSerializer::anchor() {}
+
+/// Defines the format version emitted by SymbolGraphSerializer.
+const VersionTuple SymbolGraphSerializer::FormatVersion{0, 5, 3};
+
+Object SymbolGraphSerializer::serializeMetadata() const {
+ Object Metadata;
+ serializeObject(Metadata, "formatVersion",
+ serializeSemanticVersion(FormatVersion));
+ Metadata["generator"] = clang::getClangFullVersion();
+ return Metadata;
+}
+
+Object SymbolGraphSerializer::serializeModule() const {
+ Object Module;
+ // FIXME: We might not be building a module, some Clang-based languages might
+ // not have a "module" concept. Figure out a way to provide a name to
+ // describe the API set.
+ Module["name"] = "";
+ serializeObject(Module, "platform", serializePlatform(API.getTarget()));
+ return Module;
+}
+
+bool SymbolGraphSerializer::shouldSkip(const APIRecord &Record) const {
+ // Skip unconditionally unavailable symbols
+ if (Record.Availability.isUnconditionallyUnavailable())
+ return true;
+
+ return false;
+}
+
+Optional<Object>
+SymbolGraphSerializer::serializeAPIRecord(const APIRecord &Record) const {
+ if (shouldSkip(Record))
+ return None;
+
+ Object Obj;
+ serializeObject(Obj, "identifier",
+ serializeIdentifier(Record, API.getLangOpts()));
+ serializeObject(Obj, "kind", serializeSymbolKind(Record, API.getLangOpts()));
+ serializeObject(Obj, "names", serializeNames(Record));
+ serializeObject(
+ Obj, "location",
+ serializeSourcePosition(Record.Location, /*IncludeFileURI=*/true));
+ serializeObject(Obj, "availbility",
+ serializeAvailability(Record.Availability));
+ serializeObject(Obj, "docComment", serializeDocComment(Record.Comment));
+ serializeArray(Obj, "declarationFragments",
+ serializeDeclarationFragments(Record.Declaration));
+
+ return Obj;
+}
+
+void SymbolGraphSerializer::serializeGlobalRecord(const GlobalRecord &Record) {
+ auto Obj = serializeAPIRecord(Record);
+ if (!Obj)
+ return;
+
+ if (Record.GlobalKind == GVKind::Function)
+ serializeObject(*Obj, "parameters",
+ serializeFunctionSignature(Record.Signature));
+
+ Symbols.emplace_back(std::move(*Obj));
+}
+
+Object SymbolGraphSerializer::serialize() {
+ Object Root;
+ serializeObject(Root, "metadata", serializeMetadata());
+ serializeObject(Root, "module", serializeModule());
+
+ // Serialize global records in the API set.
+ for (const auto &Global : API.getGlobals())
+ serializeGlobalRecord(*Global.second);
+
+ Root["symbols"] = std::move(Symbols);
+ Root["relationhips"] = std::move(Relationships);
+
+ return Root;
+}
+
+void SymbolGraphSerializer::serialize(raw_ostream &os) {
+ Object root = serialize();
+ if (Options.Compact)
+ os << formatv("{0}", Value(std::move(root))) << "\n";
+ else
+ os << formatv("{0:2}", Value(std::move(root))) << "\n";
+}