aboutsummaryrefslogtreecommitdiff
path: root/clang-tools-extra/clangd/support/Markup.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang-tools-extra/clangd/support/Markup.cpp')
-rw-r--r--clang-tools-extra/clangd/support/Markup.cpp357
1 files changed, 318 insertions, 39 deletions
diff --git a/clang-tools-extra/clangd/support/Markup.cpp b/clang-tools-extra/clangd/support/Markup.cpp
index 63aff96..a130830 100644
--- a/clang-tools-extra/clangd/support/Markup.cpp
+++ b/clang-tools-extra/clangd/support/Markup.cpp
@@ -6,12 +6,12 @@
//
//===----------------------------------------------------------------------===//
#include "support/Markup.h"
+#include "clang/Basic/CharInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/raw_ostream.h"
#include <cstddef>
#include <iterator>
@@ -64,8 +64,8 @@ bool looksLikeTag(llvm::StringRef Contents) {
// It's always safe to escape punctuation, but want minimal escaping.
// The strategy is to escape the first character of anything that might start
// a markdown grammar construct.
-bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After,
- bool StartsLine) {
+bool needsLeadingEscapePlaintext(char C, llvm::StringRef Before,
+ llvm::StringRef After, bool StartsLine) {
assert(Before.take_while(llvm::isSpace).empty());
auto RulerLength = [&]() -> /*Length*/ unsigned {
if (!StartsLine || !Before.empty())
@@ -151,16 +151,94 @@ bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After,
}
}
-/// Escape a markdown text block. Ensures the punctuation will not introduce
+/// \brief Tests whether \p C should be backslash-escaped in markdown.
+///
+/// The MarkupContent LSP specification defines that `markdown` content needs to
+/// follow GFM (GitHub Flavored Markdown) rules. And we can assume that markdown
+/// is rendered on the client side. This means we do not need to escape any
+/// markdown constructs.
+/// The only exception is when the client does not support HTML rendering in
+/// markdown. In that case, we need to escape HTML tags and HTML entities.
+///
+/// **FIXME:** handle the case when the client does support HTML rendering in
+/// markdown. For this, the LSP server needs to check the
+/// [supportsHtml
+/// capability](https://github.com/microsoft/language-server-protocol/issues/1344)
+/// of the client.
+///
+/// \param C The character to check.
+/// \param After The string that follows \p C .
+/// This is used to determine if \p C is part of a tag or an entity reference.
+///
+/// \returns true if \p C should be escaped, false otherwise.
+bool needsLeadingEscapeMarkdown(char C, llvm::StringRef After) {
+ switch (C) {
+ case '<': // HTML tag (or autolink, which we choose not to escape)
+ return looksLikeTag(After);
+ case '&': { // HTML entity reference
+ auto End = After.find(';');
+ if (End == llvm::StringRef::npos)
+ return false;
+ llvm::StringRef Content = After.substr(0, End);
+ if (Content.consume_front("#")) {
+ if (Content.consume_front("x") || Content.consume_front("X"))
+ return llvm::all_of(Content, llvm::isHexDigit);
+ return llvm::all_of(Content, llvm::isDigit);
+ }
+ return llvm::all_of(Content, llvm::isAlpha);
+ }
+ default:
+ return false;
+ }
+}
+
+bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After,
+ bool StartsLine, bool EscapeMarkdown) {
+ if (EscapeMarkdown)
+ return needsLeadingEscapePlaintext(C, Before, After, StartsLine);
+ return needsLeadingEscapeMarkdown(C, After);
+}
+
+/// Escape a markdown text block.
+/// If \p EscapeMarkdown is true it ensures the punctuation will not introduce
/// any of the markdown constructs.
-std::string renderText(llvm::StringRef Input, bool StartsLine) {
+/// Else, markdown syntax is not escaped, only HTML tags and entities.
+std::string renderText(llvm::StringRef Input, bool StartsLine,
+ bool EscapeMarkdown) {
std::string R;
- for (unsigned I = 0; I < Input.size(); ++I) {
- if (needsLeadingEscape(Input[I], Input.substr(0, I), Input.substr(I + 1),
- StartsLine))
- R.push_back('\\');
- R.push_back(Input[I]);
+ R.reserve(Input.size());
+
+ // split the input into lines, and escape each line separately.
+ llvm::StringRef Line, Rest;
+
+ bool IsFirstLine = true;
+
+ for (std::tie(Line, Rest) = Input.split('\n');
+ !(Line.empty() && Rest.empty());
+ std::tie(Line, Rest) = Rest.split('\n')) {
+
+ bool StartsLineIntern = IsFirstLine ? StartsLine : true;
+
+ // Ignore leading spaces for the escape logic, but preserve them in the
+ // output.
+ StringRef LeadingSpaces = Line.take_while(llvm::isSpace);
+ if (!LeadingSpaces.empty()) {
+ R.append(LeadingSpaces);
+ }
+
+ for (unsigned I = LeadingSpaces.size(); I < Line.size(); ++I) {
+ if (needsLeadingEscape(Line[I], Line.substr(LeadingSpaces.size(), I),
+ Line.substr(I + 1), StartsLineIntern,
+ EscapeMarkdown))
+ R.push_back('\\');
+ R.push_back(Line[I]);
+ }
+
+ IsFirstLine = false;
+ if (!Rest.empty())
+ R.push_back('\n');
}
+
return R;
}
@@ -168,6 +246,7 @@ std::string renderText(llvm::StringRef Input, bool StartsLine) {
/// is surrounded by backticks and the inner contents are properly escaped.
std::string renderInlineBlock(llvm::StringRef Input) {
std::string R;
+ R.reserve(Input.size());
// Double all backticks to make sure we don't close the inline block early.
for (size_t From = 0; From < Input.size();) {
size_t Next = Input.find("`", From);
@@ -261,6 +340,9 @@ std::string renderBlocks(llvm::ArrayRef<std::unique_ptr<Block>> Children,
// https://github.com/microsoft/vscode/issues/88416 for details.
class Ruler : public Block {
public:
+ void renderEscapedMarkdown(llvm::raw_ostream &OS) const override {
+ renderMarkdown(OS);
+ }
void renderMarkdown(llvm::raw_ostream &OS) const override {
// Note that we need an extra new line before the ruler, otherwise we might
// make previous block a title instead of introducing a ruler.
@@ -275,6 +357,9 @@ public:
class CodeBlock : public Block {
public:
+ void renderEscapedMarkdown(llvm::raw_ostream &OS) const override {
+ renderMarkdown(OS);
+ }
void renderMarkdown(llvm::raw_ostream &OS) const override {
std::string Marker = getMarkerForCodeBlock(Contents);
// No need to pad from previous blocks, as they should end with a new line.
@@ -303,11 +388,13 @@ private:
std::string indentLines(llvm::StringRef Input) {
assert(!Input.ends_with("\n") && "Input should've been trimmed.");
std::string IndentedR;
- // We'll add 2 spaces after each new line.
+ // We'll add 2 spaces after each new line which is not followed by another new
+ // line.
IndentedR.reserve(Input.size() + Input.count('\n') * 2);
- for (char C : Input) {
+ for (size_t I = 0; I < Input.size(); ++I) {
+ char C = Input[I];
IndentedR += C;
- if (C == '\n')
+ if (C == '\n' && (((I + 1) < Input.size()) && (Input[I + 1] != '\n')))
IndentedR.append(" ");
}
return IndentedR;
@@ -316,17 +403,34 @@ std::string indentLines(llvm::StringRef Input) {
class Heading : public Paragraph {
public:
Heading(size_t Level) : Level(Level) {}
+
+ void renderEscapedMarkdown(llvm::raw_ostream &OS) const override {
+ insertHeadingMarkers(OS);
+ Paragraph::renderEscapedMarkdown(OS);
+ }
+
void renderMarkdown(llvm::raw_ostream &OS) const override {
- OS << std::string(Level, '#') << ' ';
+ insertHeadingMarkers(OS);
Paragraph::renderMarkdown(OS);
}
private:
size_t Level;
+
+ void insertHeadingMarkers(llvm::raw_ostream &OS) const {
+ OS << std::string(Level, '#') << ' ';
+ }
};
} // namespace
+std::string Block::asEscapedMarkdown() const {
+ std::string R;
+ llvm::raw_string_ostream OS(R);
+ renderEscapedMarkdown(OS);
+ return llvm::StringRef(OS.str()).trim().str();
+}
+
std::string Block::asMarkdown() const {
std::string R;
llvm::raw_string_ostream OS(R);
@@ -341,6 +445,35 @@ std::string Block::asPlainText() const {
return llvm::StringRef(OS.str()).trim().str();
}
+void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
+ bool NeedsSpace = false;
+ bool HasChunks = false;
+ for (auto &C : Chunks) {
+ if (C.SpaceBefore || NeedsSpace)
+ OS << " ";
+ switch (C.Kind) {
+ case ChunkKind::PlainText:
+ OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/true);
+ break;
+ case ChunkKind::InlineCode:
+ OS << renderInlineBlock(C.Contents);
+ break;
+ case ChunkKind::Bold:
+ OS << renderText("**" + C.Contents + "**", !HasChunks,
+ /*EscapeMarkdown=*/true);
+ break;
+ case ChunkKind::Emphasized:
+ OS << renderText("*" + C.Contents + "*", !HasChunks,
+ /*EscapeMarkdown=*/true);
+ break;
+ }
+ HasChunks = true;
+ NeedsSpace = C.SpaceAfter;
+ }
+ // A paragraph in markdown is separated by a blank line.
+ OS << "\n\n";
+}
+
void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
bool NeedsSpace = false;
bool HasChunks = false;
@@ -348,20 +481,26 @@ void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
if (C.SpaceBefore || NeedsSpace)
OS << " ";
switch (C.Kind) {
- case Chunk::PlainText:
- OS << renderText(C.Contents, !HasChunks);
+ case ChunkKind::PlainText:
+ OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false);
break;
- case Chunk::InlineCode:
+ case ChunkKind::InlineCode:
OS << renderInlineBlock(C.Contents);
break;
+ case ChunkKind::Bold:
+ OS << "**" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
+ << "**";
+ break;
+ case ChunkKind::Emphasized:
+ OS << "*" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
+ << "*";
+ break;
}
HasChunks = true;
NeedsSpace = C.SpaceAfter;
}
- // Paragraphs are translated into markdown lines, not markdown paragraphs.
- // Therefore it only has a single linebreak afterwards.
- // VSCode requires two spaces at the end of line to start a new one.
- OS << " \n";
+ // A paragraph in markdown is separated by a blank line.
+ OS << "\n\n";
}
std::unique_ptr<Block> Paragraph::clone() const {
@@ -370,8 +509,8 @@ std::unique_ptr<Block> Paragraph::clone() const {
/// Choose a marker to delimit `Text` from a prioritized list of options.
/// This is more readable than escaping for plain-text.
-llvm::StringRef chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
- llvm::StringRef Text) {
+llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
+ llvm::StringRef Text) const {
// Prefer a delimiter whose characters don't appear in the text.
for (llvm::StringRef S : Options)
if (Text.find_first_of(S) == llvm::StringRef::npos)
@@ -379,31 +518,147 @@ llvm::StringRef chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
return Options.front();
}
+bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line) const {
+ constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt";
+
+ Line = Line.rtrim();
+ return !Line.empty() && Punctuation.contains(Line.back());
+}
+
+bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const {
+ // '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote,
+ // '#' headings, '`' code blocks, two spaces (markdown force newline)
+ constexpr llvm::StringLiteral LinebreakIndicators = R"txt(-*@\>#`)txt";
+
+ Rest = Rest.ltrim(" \t");
+ if (Rest.empty())
+ return false;
+
+ if (LinebreakIndicators.contains(Rest.front()))
+ return true;
+
+ if (llvm::isDigit(Rest.front())) {
+ llvm::StringRef AfterDigit = Rest.drop_while(llvm::isDigit);
+ if (AfterDigit.starts_with(".") || AfterDigit.starts_with(")"))
+ return true;
+ }
+ return false;
+}
+
+bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line,
+ llvm::StringRef Rest) const {
+ // In Markdown, 2 spaces before a line break forces a line break.
+ // Add a line break for plaintext in this case too.
+ // Should we also consider whether Line is short?
+ return Line.ends_with(" ") || punctuationIndicatesLineBreak(Line) ||
+ isHardLineBreakIndicator(Rest);
+}
+
void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
bool NeedsSpace = false;
+ std::string ConcatenatedText;
+ ConcatenatedText.reserve(EstimatedStringSize);
+
+ llvm::raw_string_ostream ConcatenatedOS(ConcatenatedText);
+
for (auto &C : Chunks) {
+
+ if (C.Kind == ChunkKind::PlainText) {
+ if (C.SpaceBefore || NeedsSpace)
+ ConcatenatedOS << ' ';
+
+ ConcatenatedOS << C.Contents;
+ NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter;
+ continue;
+ }
+
if (C.SpaceBefore || NeedsSpace)
- OS << " ";
+ ConcatenatedOS << ' ';
llvm::StringRef Marker = "";
- if (C.Preserve && C.Kind == Chunk::InlineCode)
+ if (C.Preserve && C.Kind == ChunkKind::InlineCode)
Marker = chooseMarker({"`", "'", "\""}, C.Contents);
- OS << Marker << C.Contents << Marker;
+ else if (C.Kind == ChunkKind::Bold)
+ Marker = "**";
+ else if (C.Kind == ChunkKind::Emphasized)
+ Marker = "*";
+ ConcatenatedOS << Marker << C.Contents << Marker;
NeedsSpace = C.SpaceAfter;
}
- OS << '\n';
+
+ // We go through the contents line by line to handle the newlines
+ // and required spacing correctly.
+ //
+ // Newlines are added if:
+ // - the line ends with 2 spaces and a newline follows
+ // - the line ends with punctuation that indicates a line break (.:,;!?)
+ // - the next line starts with a hard line break indicator (-@>#`, or a digit
+ // followed by '.' or ')'), ignoring leading whitespace.
+ //
+ // Otherwise, newlines in the input are replaced with a single space.
+ //
+ // Multiple spaces are collapsed into a single space.
+ //
+ // Lines containing only whitespace are ignored.
+ llvm::StringRef Line, Rest;
+
+ for (std::tie(Line, Rest) =
+ llvm::StringRef(ConcatenatedText).trim().split('\n');
+ !(Line.empty() && Rest.empty());
+ std::tie(Line, Rest) = Rest.split('\n')) {
+
+ // Remove lines which only contain whitespace.
+ //
+ // Note: this also handles the case when there are multiple newlines
+ // in a row, since all leading newlines are removed.
+ //
+ // The documentation parsing treats multiple newlines as paragraph
+ // separators, hence it will create a new Paragraph instead of adding
+ // multiple newlines to the same Paragraph.
+ // Therfore multiple newlines are never added to a paragraph
+ // except if the user explicitly adds them using
+ // e.g. appendText("user text\n\nnext text").
+ Line = Line.ltrim();
+ if (Line.empty())
+ continue;
+
+ OS << canonicalizeSpaces(Line);
+
+ if (isHardLineBreakAfter(Line, Rest))
+ OS << '\n';
+ else if (!Rest.empty())
+ // Since we removed any trailing whitespace from the input using trim(),
+ // we know that the next line contains non-whitespace characters.
+ // Therefore, we can add a space without worrying about trailing spaces.
+ OS << ' ';
+ }
+
+ // Paragraphs are separated by a blank line.
+ OS << "\n\n";
}
BulletList::BulletList() = default;
BulletList::~BulletList() = default;
+void BulletList::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
+ for (auto &D : Items) {
+ std::string M = D.asEscapedMarkdown();
+ // Instead of doing this we might prefer passing Indent to children to get
+ // rid of the copies, if it turns out to be a bottleneck.
+ OS << "- " << indentLines(M) << '\n';
+ }
+ // We add 2 newlines after list to terminate it in markdown.
+ OS << "\n\n";
+}
+
void BulletList::renderMarkdown(llvm::raw_ostream &OS) const {
for (auto &D : Items) {
+ std::string M = D.asMarkdown();
// Instead of doing this we might prefer passing Indent to children to get
// rid of the copies, if it turns out to be a bottleneck.
- OS << "- " << indentLines(D.asMarkdown()) << '\n';
+ OS << "- " << indentLines(M) << '\n';
}
- // We need a new line after list to terminate it in markdown.
- OS << '\n';
+ // We add 2 newlines after list to terminate it in markdown.
+ OS << "\n\n";
}
void BulletList::renderPlainText(llvm::raw_ostream &OS) const {
@@ -412,6 +667,7 @@ void BulletList::renderPlainText(llvm::raw_ostream &OS) const {
// rid of the copies, if it turns out to be a bottleneck.
OS << "- " << indentLines(D.asPlainText()) << '\n';
}
+ OS << '\n';
}
Paragraph &Paragraph::appendSpace() {
@@ -420,32 +676,51 @@ Paragraph &Paragraph::appendSpace() {
return *this;
}
-Paragraph &Paragraph::appendText(llvm::StringRef Text) {
- std::string Norm = canonicalizeSpaces(Text);
- if (Norm.empty())
+Paragraph &Paragraph::appendChunk(llvm::StringRef Contents, ChunkKind K) {
+ if (Contents.empty())
return *this;
Chunks.emplace_back();
Chunk &C = Chunks.back();
- C.Contents = std::move(Norm);
- C.Kind = Chunk::PlainText;
- C.SpaceBefore = llvm::isSpace(Text.front());
- C.SpaceAfter = llvm::isSpace(Text.back());
+ C.Contents = Contents;
+ C.Kind = K;
+
+ EstimatedStringSize += Contents.size();
return *this;
}
+Paragraph &Paragraph::appendText(llvm::StringRef Text) {
+ if (!Chunks.empty() && Chunks.back().Kind == ChunkKind::PlainText) {
+ Chunks.back().Contents += std::move(Text);
+ return *this;
+ }
+
+ return appendChunk(std::move(Text), ChunkKind::PlainText);
+}
+
+Paragraph &Paragraph::appendEmphasizedText(llvm::StringRef Text) {
+ return appendChunk(canonicalizeSpaces(std::move(Text)),
+ ChunkKind::Emphasized);
+}
+
+Paragraph &Paragraph::appendBoldText(llvm::StringRef Text) {
+ return appendChunk(canonicalizeSpaces(std::move(Text)), ChunkKind::Bold);
+}
+
Paragraph &Paragraph::appendCode(llvm::StringRef Code, bool Preserve) {
bool AdjacentCode =
- !Chunks.empty() && Chunks.back().Kind == Chunk::InlineCode;
+ !Chunks.empty() && Chunks.back().Kind == ChunkKind::InlineCode;
std::string Norm = canonicalizeSpaces(std::move(Code));
if (Norm.empty())
return *this;
+ EstimatedStringSize += Norm.size();
Chunks.emplace_back();
Chunk &C = Chunks.back();
C.Contents = std::move(Norm);
- C.Kind = Chunk::InlineCode;
+ C.Kind = ChunkKind::InlineCode;
C.Preserve = Preserve;
// Disallow adjacent code spans without spaces, markdown can't render them.
C.SpaceBefore = AdjacentCode;
+
return *this;
}
@@ -482,6 +757,10 @@ void Document::addCodeBlock(std::string Code, std::string Language) {
std::make_unique<CodeBlock>(std::move(Code), std::move(Language)));
}
+std::string Document::asEscapedMarkdown() const {
+ return renderBlocks(Children, &Block::renderEscapedMarkdown);
+}
+
std::string Document::asMarkdown() const {
return renderBlocks(Children, &Block::renderMarkdown);
}