diff options
author | Zixu Wang <zixu_wang@apple.com> | 2022-03-21 00:53:28 -0700 |
---|---|---|
committer | Zixu Wang <zixu_wang@apple.com> | 2022-03-22 13:21:57 -0700 |
commit | 89f6b26f1beb2c1344f5cfeb34e405128544c76b (patch) | |
tree | 06b93d32cadc4163d407cc6ac7374b32c311a44d /clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp | |
parent | 57d02900b54bf162ec476da2ce2bd893dcdbe24b (diff) | |
download | llvm-89f6b26f1beb2c1344f5cfeb34e405128544c76b.zip llvm-89f6b26f1beb2c1344f5cfeb34e405128544c76b.tar.gz llvm-89f6b26f1beb2c1344f5cfeb34e405128544c76b.tar.bz2 |
[clang][extract-api] Refactor ExtractAPI and improve docs
- The name SymbolGraph is inappropriate and confusing for the new library
for clang-extract-api. Refactor and rename things to make it clear that
ExtractAPI is the core functionality and SymbolGraph is one serializer
for the API information.
- Add documentation comments to ExtractAPI classes and methods to improve
readability and clearness of the ExtractAPI work.
Differential Revision: https://reviews.llvm.org/D122160
Diffstat (limited to 'clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp')
-rw-r--r-- | clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp | 450 |
1 files changed, 450 insertions, 0 deletions
diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp new file mode 100644 index 0000000..b0bc203 --- /dev/null +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -0,0 +1,450 @@ +//===- ExtractAPI/Serialization/SymbolGraphSerializer.cpp -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the SymbolGraphSerializer. +/// +//===----------------------------------------------------------------------===// + +#include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h" +#include "clang/Basic/Version.h" +#include "clang/ExtractAPI/API.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/VersionTuple.h" + +using namespace clang; +using namespace clang::extractapi; +using namespace llvm; +using namespace llvm::json; + +namespace { + +/// Helper function to inject a JSON object \p Obj into another object \p Paren +/// at position \p Key. +void serializeObject(Object &Paren, StringRef Key, Optional<Object> Obj) { + if (Obj) + Paren[Key] = std::move(Obj.getValue()); +} + +/// Helper function to inject a JSON array \p Array into object \p Paren at +/// position \p Key. +void serializeArray(Object &Paren, StringRef Key, Optional<Array> Array) { + if (Array) + Paren[Key] = std::move(Array.getValue()); +} + +/// Serialize a \c VersionTuple \p V with the Symbol Graph semantic version +/// format. +/// +/// A semantic version object contains three numeric fields, representing the +/// \c major, \c minor, and \c patch parts of the version tuple. +/// For example version tuple 1.0.3 is serialized as: +/// \code +/// { +/// "major" : 1, +/// "minor" : 0, +/// "patch" : 3 +/// } +/// \endcode +/// +/// \returns \c None if the version \p V is empty, or an \c Object containing +/// the semantic version representation of \p V. +Optional<Object> serializeSemanticVersion(const VersionTuple &V) { + if (V.empty()) + return None; + + Object Version; + Version["major"] = V.getMajor(); + Version["minor"] = V.getMinor().getValueOr(0); + Version["patch"] = V.getSubminor().getValueOr(0); + return Version; +} + +/// Serialize the OS information in the Symbol Graph platform property. +/// +/// The OS information in Symbol Graph contains the \c name of the OS, and an +/// optional \c minimumVersion semantic version field. +Object serializeOperatingSystem(const Triple &T) { + Object OS; + OS["name"] = T.getOSTypeName(T.getOS()); + serializeObject(OS, "minimumVersion", + serializeSemanticVersion(T.getMinimumSupportedOSVersion())); + return OS; +} + +/// Serialize the platform information in the Symbol Graph module section. +/// +/// The platform object describes a target platform triple in corresponding +/// three fields: \c architecture, \c vendor, and \c operatingSystem. +Object serializePlatform(const Triple &T) { + Object Platform; + Platform["architecture"] = T.getArchName(); + Platform["vendor"] = T.getVendorName(); + Platform["operatingSystem"] = serializeOperatingSystem(T); + return Platform; +} + +/// Serialize a source location in file. +/// +/// \param Loc The presumed location to serialize. +/// \param IncludeFileURI If true, include the file path of \p Loc as a URI. +/// Defaults to false. +Object serializeSourcePosition(const PresumedLoc &Loc, + bool IncludeFileURI = false) { + assert(Loc.isValid() && "invalid source position"); + + Object SourcePosition; + SourcePosition["line"] = Loc.getLine(); + SourcePosition["character"] = Loc.getColumn(); + + if (IncludeFileURI) { + std::string FileURI = "file://"; + // Normalize file path to use forward slashes for the URI. + FileURI += sys::path::convert_to_slash(Loc.getFilename()); + SourcePosition["uri"] = FileURI; + } + + return SourcePosition; +} + +/// Serialize a source range with begin and end locations. +Object serializeSourceRange(const PresumedLoc &BeginLoc, + const PresumedLoc &EndLoc) { + Object SourceRange; + serializeObject(SourceRange, "start", serializeSourcePosition(BeginLoc)); + serializeObject(SourceRange, "end", serializeSourcePosition(EndLoc)); + return SourceRange; +} + +/// Serialize the availability attributes of a symbol. +/// +/// Availability information contains the introduced, deprecated, and obsoleted +/// versions of the symbol as semantic versions, if not default. +/// Availability information also contains flags to indicate if the symbol is +/// unconditionally unavailable or deprecated, +/// i.e. \c __attribute__((unavailable)) and \c __attribute__((deprecated)). +/// +/// \returns \c None if the symbol has default availability attributes, or +/// an \c Object containing the formatted availability information. +Optional<Object> serializeAvailability(const AvailabilityInfo &Avail) { + if (Avail.isDefault()) + return None; + + Object Availbility; + serializeObject(Availbility, "introducedVersion", + serializeSemanticVersion(Avail.Introduced)); + serializeObject(Availbility, "deprecatedVersion", + serializeSemanticVersion(Avail.Deprecated)); + serializeObject(Availbility, "obsoletedVersion", + serializeSemanticVersion(Avail.Obsoleted)); + if (Avail.isUnavailable()) + Availbility["isUnconditionallyUnavailable"] = true; + if (Avail.isUnconditionallyDeprecated()) + Availbility["isUnconditionallyDeprecated"] = true; + + return Availbility; +} + +/// Get the short language name string for interface language references. +StringRef getLanguageName(const LangOptions &LangOpts) { + auto Language = + LangStandard::getLangStandardForKind(LangOpts.LangStd).getLanguage(); + switch (Language) { + case Language::C: + return "c"; + case Language::ObjC: + return "objc"; + + // Unsupported language currently + case Language::CXX: + case Language::ObjCXX: + case Language::OpenCL: + case Language::OpenCLCXX: + case Language::CUDA: + case Language::RenderScript: + case Language::HIP: + + // Languages that the frontend cannot parse and compile + case Language::Unknown: + case Language::Asm: + case Language::LLVM_IR: + llvm_unreachable("Unsupported language kind"); + } + + llvm_unreachable("Unhandled language kind"); +} + +/// Serialize the identifier object as specified by the Symbol Graph format. +/// +/// The identifier property of a symbol contains the USR for precise and unique +/// references, and the interface language name. +Object serializeIdentifier(const APIRecord &Record, + const LangOptions &LangOpts) { + Object Identifier; + Identifier["precise"] = Record.USR; + Identifier["interfaceLanguage"] = getLanguageName(LangOpts); + + return Identifier; +} + +/// Serialize the documentation comments attached to a symbol, as specified by +/// the Symbol Graph format. +/// +/// The Symbol Graph \c docComment object contains an array of lines. Each line +/// represents one line of striped documentation comment, with source range +/// information. +/// e.g. +/// \code +/// /// This is a documentation comment +/// ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' First line. +/// /// with multiple lines. +/// ^~~~~~~~~~~~~~~~~~~~~~~' Second line. +/// \endcode +/// +/// \returns \c None if \p Comment is empty, or an \c Object containing the +/// formatted lines. +Optional<Object> serializeDocComment(const DocComment &Comment) { + if (Comment.empty()) + return None; + + Object DocComment; + Array LinesArray; + for (const auto &CommentLine : Comment) { + Object Line; + Line["text"] = CommentLine.Text; + serializeObject(Line, "range", + serializeSourceRange(CommentLine.Begin, CommentLine.End)); + LinesArray.emplace_back(std::move(Line)); + } + serializeArray(DocComment, "lines", LinesArray); + + return DocComment; +} + +/// Serialize the declaration fragments of a symbol. +/// +/// The Symbol Graph declaration fragments is an array of tagged important +/// parts of a symbol's declaration. The fragments sequence can be joined to +/// form spans of declaration text, with attached information useful for +/// purposes like syntax-highlighting etc. For example: +/// \code +/// const int pi; -> "declarationFragments" : [ +/// { +/// "kind" : "keyword", +/// "spelling" : "const" +/// }, +/// { +/// "kind" : "text", +/// "spelling" : " " +/// }, +/// { +/// "kind" : "typeIdentifier", +/// "preciseIdentifier" : "c:I", +/// "spelling" : "int" +/// }, +/// { +/// "kind" : "text", +/// "spelling" : " " +/// }, +/// { +/// "kind" : "identifier", +/// "spelling" : "pi" +/// } +/// ] +/// \endcode +/// +/// \returns \c None if \p DF is empty, or an \c Array containing the formatted +/// declaration fragments array. +Optional<Array> serializeDeclarationFragments(const DeclarationFragments &DF) { + if (DF.getFragments().empty()) + return None; + + Array Fragments; + for (const auto &F : DF.getFragments()) { + Object Fragment; + Fragment["spelling"] = F.Spelling; + Fragment["kind"] = DeclarationFragments::getFragmentKindString(F.Kind); + if (!F.PreciseIdentifier.empty()) + Fragment["preciseIdentifier"] = F.PreciseIdentifier; + Fragments.emplace_back(std::move(Fragment)); + } + + return Fragments; +} + +/// Serialize the function signature field of a function, as specified by the +/// Symbol Graph format. +/// +/// The Symbol Graph function signature property contains two arrays. +/// - The \c returns array is the declaration fragments of the return type; +/// - The \c parameters array contains names and declaration fragments of the +/// parameters. +/// +/// \returns \c None if \p FS is empty, or an \c Object containing the +/// formatted function signature. +Optional<Object> serializeFunctionSignature(const FunctionSignature &FS) { + if (FS.empty()) + return None; + + Object Signature; + serializeArray(Signature, "returns", + serializeDeclarationFragments(FS.getReturnType())); + + Array Parameters; + for (const auto &P : FS.getParameters()) { + Object Parameter; + Parameter["name"] = P.Name; + serializeArray(Parameter, "declarationFragments", + serializeDeclarationFragments(P.Fragments)); + Parameters.emplace_back(std::move(Parameter)); + } + + if (!Parameters.empty()) + Signature["parameters"] = std::move(Parameters); + + return Signature; +} + +/// Serialize the \c names field of a symbol as specified by the Symbol Graph +/// format. +/// +/// The Symbol Graph names field contains multiple representations of a symbol +/// that can be used for different applications: +/// - \c title : The simple declared name of the symbol; +/// - \c subHeading : An array of declaration fragments that provides tags, +/// and potentially more tokens (for example the \c +/- symbol for +/// Objective-C methods). Can be used as sub-headings for documentation. +Object serializeNames(const APIRecord &Record) { + Object Names; + Names["title"] = Record.Name; + serializeArray(Names, "subHeading", + serializeDeclarationFragments(Record.SubHeading)); + + return Names; +} + +/// Serialize the symbol kind information. +/// +/// The Symbol Graph symbol kind property contains a shorthand \c identifier +/// which is prefixed by the source language name, useful for tooling to parse +/// the kind, and a \c displayName for rendering human-readable names. +Object serializeSymbolKind(const APIRecord &Record, + const LangOptions &LangOpts) { + Object Kind; + switch (Record.getKind()) { + case APIRecord::RK_Global: + auto *GR = dyn_cast<GlobalRecord>(&Record); + switch (GR->GlobalKind) { + case GVKind::Function: + Kind["identifier"] = (getLanguageName(LangOpts) + ".func").str(); + Kind["displayName"] = "Function"; + break; + case GVKind::Variable: + Kind["identifier"] = (getLanguageName(LangOpts) + ".var").str(); + Kind["displayName"] = "Global Variable"; + break; + case GVKind::Unknown: + // Unknown global kind + break; + } + break; + } + + return Kind; +} + +} // namespace + +void SymbolGraphSerializer::anchor() {} + +/// Defines the format version emitted by SymbolGraphSerializer. +const VersionTuple SymbolGraphSerializer::FormatVersion{0, 5, 3}; + +Object SymbolGraphSerializer::serializeMetadata() const { + Object Metadata; + serializeObject(Metadata, "formatVersion", + serializeSemanticVersion(FormatVersion)); + Metadata["generator"] = clang::getClangFullVersion(); + return Metadata; +} + +Object SymbolGraphSerializer::serializeModule() const { + Object Module; + // FIXME: We might not be building a module, some Clang-based languages might + // not have a "module" concept. Figure out a way to provide a name to + // describe the API set. + Module["name"] = ""; + serializeObject(Module, "platform", serializePlatform(API.getTarget())); + return Module; +} + +bool SymbolGraphSerializer::shouldSkip(const APIRecord &Record) const { + // Skip unconditionally unavailable symbols + if (Record.Availability.isUnconditionallyUnavailable()) + return true; + + return false; +} + +Optional<Object> +SymbolGraphSerializer::serializeAPIRecord(const APIRecord &Record) const { + if (shouldSkip(Record)) + return None; + + Object Obj; + serializeObject(Obj, "identifier", + serializeIdentifier(Record, API.getLangOpts())); + serializeObject(Obj, "kind", serializeSymbolKind(Record, API.getLangOpts())); + serializeObject(Obj, "names", serializeNames(Record)); + serializeObject( + Obj, "location", + serializeSourcePosition(Record.Location, /*IncludeFileURI=*/true)); + serializeObject(Obj, "availbility", + serializeAvailability(Record.Availability)); + serializeObject(Obj, "docComment", serializeDocComment(Record.Comment)); + serializeArray(Obj, "declarationFragments", + serializeDeclarationFragments(Record.Declaration)); + + return Obj; +} + +void SymbolGraphSerializer::serializeGlobalRecord(const GlobalRecord &Record) { + auto Obj = serializeAPIRecord(Record); + if (!Obj) + return; + + if (Record.GlobalKind == GVKind::Function) + serializeObject(*Obj, "parameters", + serializeFunctionSignature(Record.Signature)); + + Symbols.emplace_back(std::move(*Obj)); +} + +Object SymbolGraphSerializer::serialize() { + Object Root; + serializeObject(Root, "metadata", serializeMetadata()); + serializeObject(Root, "module", serializeModule()); + + // Serialize global records in the API set. + for (const auto &Global : API.getGlobals()) + serializeGlobalRecord(*Global.second); + + Root["symbols"] = std::move(Symbols); + Root["relationhips"] = std::move(Relationships); + + return Root; +} + +void SymbolGraphSerializer::serialize(raw_ostream &os) { + Object root = serialize(); + if (Options.Compact) + os << formatv("{0}", Value(std::move(root))) << "\n"; + else + os << formatv("{0:2}", Value(std::move(root))) << "\n"; +} |