aboutsummaryrefslogtreecommitdiff
path: root/clang-tools-extra/clang-doc/MDParser.h
diff options
context:
space:
mode:
Diffstat (limited to 'clang-tools-extra/clang-doc/MDParser.h')
-rw-r--r--clang-tools-extra/clang-doc/MDParser.h99
1 files changed, 99 insertions, 0 deletions
diff --git a/clang-tools-extra/clang-doc/MDParser.h b/clang-tools-extra/clang-doc/MDParser.h
new file mode 100644
index 0000000..32599ea
--- /dev/null
+++ b/clang-tools-extra/clang-doc/MDParser.h
@@ -0,0 +1,99 @@
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MD_PARSER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MD_PARSER_H
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/StringSaver.h"
+#include <list>
+
+using namespace llvm;
+
+namespace clang {
+namespace doc {
+using llvm::SmallString;
+enum class MDState { Emphasis, Strong, None };
+
+enum class MDType {
+ Paragraph,
+ Emphasis,
+ Strong,
+ Text,
+ Softbreak,
+};
+
+enum class MDTokenType { LeftDelimiterRun, RightDelimiterRun, Text };
+
+struct Node {
+ SmallVector<Node*> Children;
+ MDType Type;
+ Node *Parent;
+ std::string Content;
+};
+
+struct DelimiterContext {
+ bool RightFlanking;
+ bool LeftFlanking;
+ bool CanOpen;
+ bool CanClose;
+ char DelimChar;
+ // Since Content is a StringRef, we separately track the length so that we can
+ // decrement when necessary without modifying the string.
+ size_t Length;
+};
+
+/// A LineNode might be a valid delimiter run, text, or a delimiter run that
+/// will later be merged with a text if there is no matching run e.g. ***foo.
+/// @brief A preprocessing structure for tracking text in a line.
+struct LineNode {
+ StringRef Content;
+ // Instantiated if the line is a delimiter run.
+ std::optional<DelimiterContext> DelimiterContext;
+};
+
+class MarkdownParser {
+ // MDState State;
+ BumpPtrAllocator Arena;
+ StringSaver Saver;
+
+ /// If a delimiter is found, determine if it is a delimiter run, what type of
+ /// run it is, and whether it can be an opener or closer.
+ ///
+ /// The CommonMark specification defines delimiter runs as:
+ /// A delimiter run is either a sequence of one or more * or _ characters that
+ /// is not preceded or followed by a non-backslash-escaped * or _ character
+ ///
+ /// A left-flanking delimiter run is a delimiter run that is (1) not followed
+ /// by Unicode whitespace, and either (2a) not followed by a Unicode
+ /// punctuation character, or (2b) followed by a Unicode punctuation character
+ /// and preceded by Unicode whitespace or a Unicode punctuation character.
+ ///
+ /// A right-flanking delimiter run is a delimiter run that is (1) not preceded
+ /// by Unicode whitespace, and either (2a) not preceded by a Unicode
+ /// punctuation character, or (2b) preceded by a Unicode punctuation character
+ /// and followed by Unicode whitespace or a Unicode punctuation character.
+ ///
+ /// @param IdxOrigin the index of * or _ that might start a delimiter run.
+ /// @return A pair denoting the type of run and the index where the run stops
+ std::pair<std::optional<DelimiterContext>, size_t>
+ processDelimiters(SmallString<64> &Line, const size_t &Origin = 0);
+
+ void parseLine(SmallString<64> &Line, Node *Current);
+ std::list<Node *> processEmphasis(std::list<LineNode *> &Stack);
+ void convertToNode(LineNode LN, Node *Parent);
+
+ Node *reverseIterateLine(std::list<LineNode *> &Stack,
+ std::list<LineNode *>::iterator &It);
+
+ Node *createTextNode(const std::list<LineNode *> &Text);
+
+ std::string traverse(Node *Current);
+
+ /// @param Lines An entire Document that resides in a comment.
+ /// @return the root of a Markdown document.
+ Node* parse(std::vector<SmallString<64>> &Lines);
+public:
+ MarkdownParser() : Arena(BumpPtrAllocator()), Saver(Arena) {}
+ std::string render(std::vector<SmallString<64>> &Lines);
+};
+} // namespace doc
+} // namespace clang
+#endif