aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Format
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Format')
-rw-r--r--clang/lib/Format/BreakableToken.cpp114
-rw-r--r--clang/lib/Format/BreakableToken.h44
-rw-r--r--clang/lib/Format/ContinuationIndenter.cpp63
-rw-r--r--clang/lib/Format/FormatToken.h5
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp29
-rw-r--r--clang/lib/Format/WhitespaceManager.cpp62
6 files changed, 292 insertions, 25 deletions
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp
index af1e074..954eeb9 100644
--- a/clang/lib/Format/BreakableToken.cpp
+++ b/clang/lib/Format/BreakableToken.cpp
@@ -292,6 +292,120 @@ void BreakableStringLiteral::insertBreak(unsigned LineIndex,
Prefix, InPPDirective, 1, StartColumn);
}
+BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
+ const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
+ unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style)
+ : BreakableStringLiteral(
+ Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"
+ : QuoteStyle == AtDoubleQuotes ? "@\""
+ : "\"",
+ /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
+ UnbreakableTailLength, InPPDirective, Encoding, Style),
+ BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
+ QuoteStyle(QuoteStyle) {
+ // Find the replacement text for inserting braces and quotes and line breaks.
+ // We don't create an allocated string concatenated from parts here because it
+ // has to outlive the BreakableStringliteral object. The brace replacements
+ // include a quote so that WhitespaceManager can tell it apart from whitespace
+ // replacements between the string and surrounding tokens.
+
+ // The option is not implemented in JavaScript.
+ bool SignOnNewLine =
+ !Style.isJavaScript() &&
+ Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
+
+ if (Style.isVerilog()) {
+ // In Verilog, all strings are quoted by double quotes, joined by commas,
+ // and wrapped in braces. The comma is always before the newline.
+ assert(QuoteStyle == DoubleQuotes);
+ LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";
+ RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";
+ Postfix = "\",";
+ Prefix = "\"";
+ } else {
+ // The plus sign may be on either line. And also C# and JavaScript have
+ // several quoting styles.
+ if (QuoteStyle == SingleQuotes) {
+ LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('";
+ RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')";
+ Postfix = SignOnNewLine ? "'" : "' +";
+ Prefix = SignOnNewLine ? "+ '" : "'";
+ } else {
+ if (QuoteStyle == AtDoubleQuotes) {
+ LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@";
+ Prefix = SignOnNewLine ? "+ @\"" : "@\"";
+ } else {
+ LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\"";
+ Prefix = SignOnNewLine ? "+ \"" : "\"";
+ }
+ RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")";
+ Postfix = SignOnNewLine ? "\"" : "\" +";
+ }
+ }
+
+ // Following lines are indented by the width of the brace and space if any.
+ ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0;
+ // The plus sign may need to be unindented depending on the style.
+ // FIXME: Add support for DontAlign.
+ if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
+ Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) {
+ ContinuationIndent -= 2;
+ }
+}
+
+unsigned BreakableStringLiteralUsingOperators::getRemainingLength(
+ unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
+ return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +
+ encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn,
+ Style.TabWidth, Encoding);
+}
+
+unsigned
+BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex,
+ bool Break) const {
+ return std::max(
+ 0,
+ static_cast<int>(StartColumn) +
+ (Break ? ContinuationIndent + static_cast<int>(Prefix.size())
+ : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1
+ : 0) +
+ (QuoteStyle == AtDoubleQuotes ? 2 : 1)));
+}
+
+void BreakableStringLiteralUsingOperators::insertBreak(
+ unsigned LineIndex, unsigned TailOffset, Split Split,
+ unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
+ Whitespaces.replaceWhitespaceInToken(
+ Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +
+ Split.first,
+ /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,
+ /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1,
+ /*Spaces=*/
+ std::max(0, static_cast<int>(StartColumn) + ContinuationIndent));
+}
+
+void BreakableStringLiteralUsingOperators::updateAfterBroken(
+ WhitespaceManager &Whitespaces) const {
+ // Add the braces required for breaking the token if they are needed.
+ if (!BracesNeeded)
+ return;
+
+ // To add a brace or parenthesis, we replace the quote (or the at sign) with a
+ // brace and another quote. This is because the rest of the program requires
+ // one replacement for each source range. If we replace the empty strings
+ // around the string, it may conflict with whitespace replacements between the
+ // string and adjacent tokens.
+ Whitespaces.replaceWhitespaceInToken(
+ Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",
+ /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0,
+ /*Spaces=*/0);
+ Whitespaces.replaceWhitespaceInToken(
+ Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,
+ /*PreviousPostfix=*/RightBraceQuote,
+ /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0);
+}
+
BreakableComment::BreakableComment(const FormatToken &Token,
unsigned StartColumn, bool InPPDirective,
encoding::Encoding Encoding,
diff --git a/clang/lib/Format/BreakableToken.h b/clang/lib/Format/BreakableToken.h
index 190144a..e7c0680 100644
--- a/clang/lib/Format/BreakableToken.h
+++ b/clang/lib/Format/BreakableToken.h
@@ -230,6 +230,11 @@ public:
/// as a unit and is responsible for the formatting of the them.
virtual void updateNextToken(LineState &State) const {}
+ /// Adds replacements that are needed when the token is broken. Such as
+ /// wrapping a JavaScript string in parentheses after it gets broken with plus
+ /// signs.
+ virtual void updateAfterBroken(WhitespaceManager &Whitespaces) const {}
+
protected:
BreakableToken(const FormatToken &Tok, bool InPPDirective,
encoding::Encoding Encoding, const FormatStyle &Style)
@@ -283,6 +288,45 @@ protected:
unsigned UnbreakableTailLength;
};
+class BreakableStringLiteralUsingOperators : public BreakableStringLiteral {
+public:
+ enum QuoteStyleType {
+ DoubleQuotes, // The string is quoted with double quotes.
+ SingleQuotes, // The JavaScript string is quoted with single quotes.
+ AtDoubleQuotes, // The C# verbatim string is quoted with the at sign and
+ // double quotes.
+ };
+ /// Creates a breakable token for a single line string literal for C#, Java,
+ /// JavaScript, or Verilog.
+ ///
+ /// \p StartColumn specifies the column in which the token will start
+ /// after formatting.
+ BreakableStringLiteralUsingOperators(
+ const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
+ unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style);
+ unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+ unsigned StartColumn) const override;
+ unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
+ void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+ unsigned ContentIndent,
+ WhitespaceManager &Whitespaces) const override;
+ void updateAfterBroken(WhitespaceManager &Whitespaces) const override;
+
+protected:
+ // Whether braces or parentheses should be inserted around the string to form
+ // a concatenation.
+ bool BracesNeeded;
+ QuoteStyleType QuoteStyle;
+ // The braces or parentheses along with the first character which they
+ // replace, either a quote or at sign.
+ StringRef LeftBraceQuote;
+ StringRef RightBraceQuote;
+ // Width added to the left due to the added brace or parenthesis. Does not
+ // apply to the first line.
+ int ContinuationIndent;
+};
+
class BreakableComment : public BreakableToken {
protected:
/// Creates a breakable token for a comment.
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 386235d..3f15faf 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -36,6 +36,14 @@ static bool shouldIndentWrappedSelectorName(const FormatStyle &Style,
return Style.IndentWrappedFunctionNames || LineType == LT_ObjCMethodDecl;
}
+// Returns true if a binary operator following \p Tok should be unindented when
+// the style permits it.
+static bool shouldUnindentNextOperator(const FormatToken &Tok) {
+ const FormatToken *Previous = Tok.getPreviousNonComment();
+ return Previous && (Previous->getPrecedence() == prec::Assignment ||
+ Previous->isOneOf(tok::kw_return, TT_RequiresClause));
+}
+
// Returns the length of everything up to the first possible line break after
// the ), ], } or > matching \c Tok.
static unsigned getLengthToMatchingParen(const FormatToken &Tok,
@@ -1618,11 +1626,10 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
if (Previous && Previous->endsSequence(tok::l_paren, tok::kw__Generic))
NewParenState.Indent = CurrentState.LastSpace;
- if (Previous &&
- (Previous->getPrecedence() == prec::Assignment ||
- Previous->isOneOf(tok::kw_return, TT_RequiresClause) ||
- (PrecedenceLevel == prec::Conditional && Previous->is(tok::question) &&
- Previous->is(TT_ConditionalExpr))) &&
+ if ((shouldUnindentNextOperator(Current) ||
+ (Previous &&
+ (PrecedenceLevel == prec::Conditional &&
+ Previous->is(tok::question) && Previous->is(TT_ConditionalExpr)))) &&
!Newline) {
// If BreakBeforeBinaryOperators is set, un-indent a bit to account for
// the operator and keep the operands aligned.
@@ -2186,14 +2193,9 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current,
LineState &State, bool AllowBreak) {
unsigned StartColumn = State.Column - Current.ColumnWidth;
if (Current.isStringLiteral()) {
- // FIXME: String literal breaking is currently disabled for C#, Java, Json
- // and JavaScript, as it requires strings to be merged using "+" which we
- // don't support.
- if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
- Style.isCSharp() || Style.isJson() || !Style.BreakStringLiterals ||
- !AllowBreak) {
+ // Strings in JSON can not be broken.
+ if (Style.isJson() || !Style.BreakStringLiterals || !AllowBreak)
return nullptr;
- }
// Don't break string literals inside preprocessor directives (except for
// #define directives, as their contents are stored in separate lines and
@@ -2212,6 +2214,33 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current,
return nullptr;
StringRef Text = Current.TokenText;
+ // We need this to address the case where there is an unbreakable tail only
+ // if certain other formatting decisions have been taken. The
+ // UnbreakableTailLength of Current is an overapproximation in that case and
+ // we need to be correct here.
+ unsigned UnbreakableTailLength = (State.NextToken && canBreak(State))
+ ? 0
+ : Current.UnbreakableTailLength;
+
+ if (Style.isVerilog() || Style.Language == FormatStyle::LK_Java ||
+ Style.isJavaScript() || Style.isCSharp()) {
+ BreakableStringLiteralUsingOperators::QuoteStyleType QuoteStyle;
+ if (Style.isJavaScript() && Text.startswith("'") && Text.endswith("'")) {
+ QuoteStyle = BreakableStringLiteralUsingOperators::SingleQuotes;
+ } else if (Style.isCSharp() && Text.startswith("@\"") &&
+ Text.endswith("\"")) {
+ QuoteStyle = BreakableStringLiteralUsingOperators::AtDoubleQuotes;
+ } else if (Text.startswith("\"") && Text.endswith("\"")) {
+ QuoteStyle = BreakableStringLiteralUsingOperators::DoubleQuotes;
+ } else {
+ return nullptr;
+ }
+ return std::make_unique<BreakableStringLiteralUsingOperators>(
+ Current, QuoteStyle,
+ /*UnindentPlus=*/shouldUnindentNextOperator(Current), StartColumn,
+ UnbreakableTailLength, State.Line->InPPDirective, Encoding, Style);
+ }
+
StringRef Prefix;
StringRef Postfix;
// FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
@@ -2224,13 +2253,6 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current,
Text.startswith(Prefix = "u8\"") ||
Text.startswith(Prefix = "L\""))) ||
(Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {
- // We need this to address the case where there is an unbreakable tail
- // only if certain other formatting decisions have been taken. The
- // UnbreakableTailLength of Current is an overapproximation is that case
- // and we need to be correct here.
- unsigned UnbreakableTailLength = (State.NextToken && canBreak(State))
- ? 0
- : Current.UnbreakableTailLength;
return std::make_unique<BreakableStringLiteral>(
Current, StartColumn, Prefix, Postfix, UnbreakableTailLength,
State.Line->InPPDirective, Encoding, Style);
@@ -2631,6 +2653,9 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
Current.UnbreakableTailLength;
if (BreakInserted) {
+ if (!DryRun)
+ Token->updateAfterBroken(Whitespaces);
+
// If we break the token inside a parameter list, we need to break before
// the next parameter on all levels, so that the next parameter is clearly
// visible. Line comments already introduce a break.
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index f978ade..1ea7ce6 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -134,6 +134,11 @@ namespace format {
TYPE(StartOfName) \
TYPE(StatementAttributeLikeMacro) \
TYPE(StatementMacro) \
+ /* A string that is part of a string concatenation. For C#, JavaScript, and \
+ * Java, it is used for marking whether a string needs parentheses around it \
+ * if it is to be split into parts joined by `+`. For Verilog, whether \
+ * braces need to be added to split it. Not used for other languages. */ \
+ TYPE(StringInConcatenation) \
TYPE(StructLBrace) \
TYPE(StructuredBindingLSquare) \
TYPE(TemplateCloser) \
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 500dc34..ea81141 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -863,6 +863,11 @@ private:
OpeningBrace.Previous->is(TT_JsTypeColon)) {
Contexts.back().IsExpression = false;
}
+ if (Style.isVerilog() &&
+ (!OpeningBrace.getPreviousNonComment() ||
+ OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) {
+ Contexts.back().VerilogMayBeConcatenation = true;
+ }
unsigned CommaCount = 0;
while (CurrentToken) {
@@ -1737,6 +1742,9 @@ private:
bool InCpp11AttributeSpecifier = false;
bool InCSharpAttributeSpecifier = false;
bool VerilogAssignmentFound = false;
+ // Whether the braces may mean concatenation instead of structure or array
+ // literal.
+ bool VerilogMayBeConcatenation = false;
enum {
Unknown,
// Like the part after `:` in a constructor.
@@ -2070,6 +2078,14 @@ private:
} else {
Current.setType(TT_LineComment);
}
+ } else if (Current.is(tok::string_literal)) {
+ if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation &&
+ Current.getPreviousNonComment() &&
+ Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) &&
+ Current.getNextNonComment() &&
+ Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) {
+ Current.setType(TT_StringInConcatenation);
+ }
} else if (Current.is(tok::l_paren)) {
if (lParenStartsCppCast(Current))
Current.setType(TT_CppCastLParen);
@@ -2740,6 +2756,19 @@ public:
Start = Current;
}
+ if ((Style.isCSharp() || Style.isJavaScript() ||
+ Style.Language == FormatStyle::LK_Java) &&
+ Precedence == prec::Additive && Current) {
+ // A string can be broken without parentheses around it when it is
+ // already in a sequence of strings joined by `+` signs.
+ FormatToken *Prev = Current->getPreviousNonComment();
+ if (Prev && Prev->is(tok::string_literal) &&
+ (Prev == Start || Prev->endsSequence(tok::string_literal, tok::plus,
+ TT_StringInConcatenation))) {
+ Prev->setType(TT_StringInConcatenation);
+ }
+ }
+
// At the end of the line or when an operator with lower precedence is
// found, insert fake parenthesis and return.
if (!Current ||
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index a27c152..b7bd8d2 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -22,8 +22,13 @@ namespace format {
bool WhitespaceManager::Change::IsBeforeInFile::operator()(
const Change &C1, const Change &C2) const {
return SourceMgr.isBeforeInTranslationUnit(
- C1.OriginalWhitespaceRange.getBegin(),
- C2.OriginalWhitespaceRange.getBegin());
+ C1.OriginalWhitespaceRange.getBegin(),
+ C2.OriginalWhitespaceRange.getBegin()) ||
+ (C1.OriginalWhitespaceRange.getBegin() ==
+ C2.OriginalWhitespaceRange.getBegin() &&
+ SourceMgr.isBeforeInTranslationUnit(
+ C1.OriginalWhitespaceRange.getEnd(),
+ C2.OriginalWhitespaceRange.getEnd()));
}
WhitespaceManager::Change::Change(const FormatToken &Tok,
@@ -1516,10 +1521,55 @@ WhitespaceManager::linkCells(CellDescriptions &&CellDesc) {
void WhitespaceManager::generateChanges() {
for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
const Change &C = Changes[i];
- if (i > 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() ==
- C.OriginalWhitespaceRange.getBegin()) {
- // Do not generate two replacements for the same location.
- continue;
+ if (i > 0) {
+ auto Last = Changes[i - 1].OriginalWhitespaceRange;
+ auto New = Changes[i].OriginalWhitespaceRange;
+ // Do not generate two replacements for the same location. As a special
+ // case, it is allowed if there is a replacement for the empty range
+ // between 2 tokens and another non-empty range at the start of the second
+ // token. We didn't implement logic to combine replacements for 2
+ // consecutive source ranges into a single replacement, because the
+ // program works fine without it.
+ //
+ // We can't eliminate empty original whitespace ranges. They appear when
+ // 2 tokens have no whitespace in between in the input. It does not
+ // matter whether whitespace is to be added. If no whitespace is to be
+ // added, the replacement will be empty, and it gets eliminated after this
+ // step in storeReplacement. For example, if the input is `foo();`,
+ // there will be a replacement for the range between every consecutive
+ // pair of tokens.
+ //
+ // A replacement at the start of a token can be added by
+ // BreakableStringLiteralUsingOperators::insertBreak when it adds braces
+ // around the string literal. Say Verilog code is being formatted and the
+ // first line is to become the next 2 lines.
+ // x("long string");
+ // x({"long ",
+ // "string"});
+ // There will be a replacement for the empty range between the parenthesis
+ // and the string and another replacement for the quote character. The
+ // replacement for the empty range between the parenthesis and the quote
+ // comes from ContinuationIndenter::addTokenOnCurrentLine when it changes
+ // the original empty range between the parenthesis and the string to
+ // another empty one. The replacement for the quote character comes from
+ // BreakableStringLiteralUsingOperators::insertBreak when it adds the
+ // brace. In the example, the replacement for the empty range is the same
+ // as the original text. However, eliminating replacements that are same
+ // as the original does not help in general. For example, a newline can
+ // be inserted, causing the first line to become the next 3 lines.
+ // xxxxxxxxxxx("long string");
+ // xxxxxxxxxxx(
+ // {"long ",
+ // "string"});
+ // In that case, the empty range between the parenthesis and the string
+ // will be replaced by a newline and 4 spaces. So we will still have to
+ // deal with a replacement for an empty source range followed by a
+ // replacement for a non-empty source range.
+ if (Last.getBegin() == New.getBegin() &&
+ (Last.getEnd() != Last.getBegin() ||
+ New.getEnd() == New.getBegin())) {
+ continue;
+ }
}
if (C.CreateReplacement) {
std::string ReplacementText = C.PreviousLinePostfix;