diff options
Diffstat (limited to 'clang/lib/Format')
-rw-r--r-- | clang/lib/Format/BreakableToken.cpp | 114 | ||||
-rw-r--r-- | clang/lib/Format/BreakableToken.h | 44 | ||||
-rw-r--r-- | clang/lib/Format/ContinuationIndenter.cpp | 63 | ||||
-rw-r--r-- | clang/lib/Format/FormatToken.h | 5 | ||||
-rw-r--r-- | clang/lib/Format/TokenAnnotator.cpp | 29 | ||||
-rw-r--r-- | clang/lib/Format/WhitespaceManager.cpp | 62 |
6 files changed, 292 insertions, 25 deletions
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index af1e074..954eeb9 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -292,6 +292,120 @@ void BreakableStringLiteral::insertBreak(unsigned LineIndex, Prefix, InPPDirective, 1, StartColumn); } +BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators( + const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, + unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableStringLiteral( + Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'" + : QuoteStyle == AtDoubleQuotes ? "@\"" + : "\"", + /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"", + UnbreakableTailLength, InPPDirective, Encoding, Style), + BracesNeeded(Tok.isNot(TT_StringInConcatenation)), + QuoteStyle(QuoteStyle) { + // Find the replacement text for inserting braces and quotes and line breaks. + // We don't create an allocated string concatenated from parts here because it + // has to outlive the BreakableStringliteral object. The brace replacements + // include a quote so that WhitespaceManager can tell it apart from whitespace + // replacements between the string and surrounding tokens. + + // The option is not implemented in JavaScript. + bool SignOnNewLine = + !Style.isJavaScript() && + Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None; + + if (Style.isVerilog()) { + // In Verilog, all strings are quoted by double quotes, joined by commas, + // and wrapped in braces. The comma is always before the newline. + assert(QuoteStyle == DoubleQuotes); + LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \""; + RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }"; + Postfix = "\","; + Prefix = "\""; + } else { + // The plus sign may be on either line. And also C# and JavaScript have + // several quoting styles. + if (QuoteStyle == SingleQuotes) { + LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('"; + RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')"; + Postfix = SignOnNewLine ? "'" : "' +"; + Prefix = SignOnNewLine ? "+ '" : "'"; + } else { + if (QuoteStyle == AtDoubleQuotes) { + LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@"; + Prefix = SignOnNewLine ? "+ @\"" : "@\""; + } else { + LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\""; + Prefix = SignOnNewLine ? "+ \"" : "\""; + } + RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")"; + Postfix = SignOnNewLine ? "\"" : "\" +"; + } + } + + // Following lines are indented by the width of the brace and space if any. + ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0; + // The plus sign may need to be unindented depending on the style. + // FIXME: Add support for DontAlign. + if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus && + Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) { + ContinuationIndent -= 2; + } +} + +unsigned BreakableStringLiteralUsingOperators::getRemainingLength( + unsigned LineIndex, unsigned Offset, unsigned StartColumn) const { + return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) + + encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn, + Style.TabWidth, Encoding); +} + +unsigned +BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex, + bool Break) const { + return std::max( + 0, + static_cast<int>(StartColumn) + + (Break ? ContinuationIndent + static_cast<int>(Prefix.size()) + : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1 + : 0) + + (QuoteStyle == AtDoubleQuotes ? 2 : 1))); +} + +void BreakableStringLiteralUsingOperators::insertBreak( + unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, WhitespaceManager &Whitespaces) const { + Whitespaces.replaceWhitespaceInToken( + Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset + + Split.first, + /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix, + /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1, + /*Spaces=*/ + std::max(0, static_cast<int>(StartColumn) + ContinuationIndent)); +} + +void BreakableStringLiteralUsingOperators::updateAfterBroken( + WhitespaceManager &Whitespaces) const { + // Add the braces required for breaking the token if they are needed. + if (!BracesNeeded) + return; + + // To add a brace or parenthesis, we replace the quote (or the at sign) with a + // brace and another quote. This is because the rest of the program requires + // one replacement for each source range. If we replace the empty strings + // around the string, it may conflict with whitespace replacements between the + // string and adjacent tokens. + Whitespaces.replaceWhitespaceInToken( + Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"", + /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0, + /*Spaces=*/0); + Whitespaces.replaceWhitespaceInToken( + Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1, + /*PreviousPostfix=*/RightBraceQuote, + /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0); +} + BreakableComment::BreakableComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, diff --git a/clang/lib/Format/BreakableToken.h b/clang/lib/Format/BreakableToken.h index 190144a..e7c0680 100644 --- a/clang/lib/Format/BreakableToken.h +++ b/clang/lib/Format/BreakableToken.h @@ -230,6 +230,11 @@ public: /// as a unit and is responsible for the formatting of the them. virtual void updateNextToken(LineState &State) const {} + /// Adds replacements that are needed when the token is broken. Such as + /// wrapping a JavaScript string in parentheses after it gets broken with plus + /// signs. + virtual void updateAfterBroken(WhitespaceManager &Whitespaces) const {} + protected: BreakableToken(const FormatToken &Tok, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) @@ -283,6 +288,45 @@ protected: unsigned UnbreakableTailLength; }; +class BreakableStringLiteralUsingOperators : public BreakableStringLiteral { +public: + enum QuoteStyleType { + DoubleQuotes, // The string is quoted with double quotes. + SingleQuotes, // The JavaScript string is quoted with single quotes. + AtDoubleQuotes, // The C# verbatim string is quoted with the at sign and + // double quotes. + }; + /// Creates a breakable token for a single line string literal for C#, Java, + /// JavaScript, or Verilog. + /// + /// \p StartColumn specifies the column in which the token will start + /// after formatting. + BreakableStringLiteralUsingOperators( + const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, + unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style); + unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, + unsigned StartColumn) const override; + unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; + void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, + WhitespaceManager &Whitespaces) const override; + void updateAfterBroken(WhitespaceManager &Whitespaces) const override; + +protected: + // Whether braces or parentheses should be inserted around the string to form + // a concatenation. + bool BracesNeeded; + QuoteStyleType QuoteStyle; + // The braces or parentheses along with the first character which they + // replace, either a quote or at sign. + StringRef LeftBraceQuote; + StringRef RightBraceQuote; + // Width added to the left due to the added brace or parenthesis. Does not + // apply to the first line. + int ContinuationIndent; +}; + class BreakableComment : public BreakableToken { protected: /// Creates a breakable token for a comment. diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 386235d..3f15faf 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -36,6 +36,14 @@ static bool shouldIndentWrappedSelectorName(const FormatStyle &Style, return Style.IndentWrappedFunctionNames || LineType == LT_ObjCMethodDecl; } +// Returns true if a binary operator following \p Tok should be unindented when +// the style permits it. +static bool shouldUnindentNextOperator(const FormatToken &Tok) { + const FormatToken *Previous = Tok.getPreviousNonComment(); + return Previous && (Previous->getPrecedence() == prec::Assignment || + Previous->isOneOf(tok::kw_return, TT_RequiresClause)); +} + // Returns the length of everything up to the first possible line break after // the ), ], } or > matching \c Tok. static unsigned getLengthToMatchingParen(const FormatToken &Tok, @@ -1618,11 +1626,10 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, if (Previous && Previous->endsSequence(tok::l_paren, tok::kw__Generic)) NewParenState.Indent = CurrentState.LastSpace; - if (Previous && - (Previous->getPrecedence() == prec::Assignment || - Previous->isOneOf(tok::kw_return, TT_RequiresClause) || - (PrecedenceLevel == prec::Conditional && Previous->is(tok::question) && - Previous->is(TT_ConditionalExpr))) && + if ((shouldUnindentNextOperator(Current) || + (Previous && + (PrecedenceLevel == prec::Conditional && + Previous->is(tok::question) && Previous->is(TT_ConditionalExpr)))) && !Newline) { // If BreakBeforeBinaryOperators is set, un-indent a bit to account for // the operator and keep the operands aligned. @@ -2186,14 +2193,9 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current, LineState &State, bool AllowBreak) { unsigned StartColumn = State.Column - Current.ColumnWidth; if (Current.isStringLiteral()) { - // FIXME: String literal breaking is currently disabled for C#, Java, Json - // and JavaScript, as it requires strings to be merged using "+" which we - // don't support. - if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || - Style.isCSharp() || Style.isJson() || !Style.BreakStringLiterals || - !AllowBreak) { + // Strings in JSON can not be broken. + if (Style.isJson() || !Style.BreakStringLiterals || !AllowBreak) return nullptr; - } // Don't break string literals inside preprocessor directives (except for // #define directives, as their contents are stored in separate lines and @@ -2212,6 +2214,33 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current, return nullptr; StringRef Text = Current.TokenText; + // We need this to address the case where there is an unbreakable tail only + // if certain other formatting decisions have been taken. The + // UnbreakableTailLength of Current is an overapproximation in that case and + // we need to be correct here. + unsigned UnbreakableTailLength = (State.NextToken && canBreak(State)) + ? 0 + : Current.UnbreakableTailLength; + + if (Style.isVerilog() || Style.Language == FormatStyle::LK_Java || + Style.isJavaScript() || Style.isCSharp()) { + BreakableStringLiteralUsingOperators::QuoteStyleType QuoteStyle; + if (Style.isJavaScript() && Text.startswith("'") && Text.endswith("'")) { + QuoteStyle = BreakableStringLiteralUsingOperators::SingleQuotes; + } else if (Style.isCSharp() && Text.startswith("@\"") && + Text.endswith("\"")) { + QuoteStyle = BreakableStringLiteralUsingOperators::AtDoubleQuotes; + } else if (Text.startswith("\"") && Text.endswith("\"")) { + QuoteStyle = BreakableStringLiteralUsingOperators::DoubleQuotes; + } else { + return nullptr; + } + return std::make_unique<BreakableStringLiteralUsingOperators>( + Current, QuoteStyle, + /*UnindentPlus=*/shouldUnindentNextOperator(Current), StartColumn, + UnbreakableTailLength, State.Line->InPPDirective, Encoding, Style); + } + StringRef Prefix; StringRef Postfix; // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. @@ -2224,13 +2253,6 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current, Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { - // We need this to address the case where there is an unbreakable tail - // only if certain other formatting decisions have been taken. The - // UnbreakableTailLength of Current is an overapproximation is that case - // and we need to be correct here. - unsigned UnbreakableTailLength = (State.NextToken && canBreak(State)) - ? 0 - : Current.UnbreakableTailLength; return std::make_unique<BreakableStringLiteral>( Current, StartColumn, Prefix, Postfix, UnbreakableTailLength, State.Line->InPPDirective, Encoding, Style); @@ -2631,6 +2653,9 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, Current.UnbreakableTailLength; if (BreakInserted) { + if (!DryRun) + Token->updateAfterBroken(Whitespaces); + // If we break the token inside a parameter list, we need to break before // the next parameter on all levels, so that the next parameter is clearly // visible. Line comments already introduce a break. diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index f978ade..1ea7ce6 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -134,6 +134,11 @@ namespace format { TYPE(StartOfName) \ TYPE(StatementAttributeLikeMacro) \ TYPE(StatementMacro) \ + /* A string that is part of a string concatenation. For C#, JavaScript, and \ + * Java, it is used for marking whether a string needs parentheses around it \ + * if it is to be split into parts joined by `+`. For Verilog, whether \ + * braces need to be added to split it. Not used for other languages. */ \ + TYPE(StringInConcatenation) \ TYPE(StructLBrace) \ TYPE(StructuredBindingLSquare) \ TYPE(TemplateCloser) \ diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 500dc34..ea81141 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -863,6 +863,11 @@ private: OpeningBrace.Previous->is(TT_JsTypeColon)) { Contexts.back().IsExpression = false; } + if (Style.isVerilog() && + (!OpeningBrace.getPreviousNonComment() || + OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) { + Contexts.back().VerilogMayBeConcatenation = true; + } unsigned CommaCount = 0; while (CurrentToken) { @@ -1737,6 +1742,9 @@ private: bool InCpp11AttributeSpecifier = false; bool InCSharpAttributeSpecifier = false; bool VerilogAssignmentFound = false; + // Whether the braces may mean concatenation instead of structure or array + // literal. + bool VerilogMayBeConcatenation = false; enum { Unknown, // Like the part after `:` in a constructor. @@ -2070,6 +2078,14 @@ private: } else { Current.setType(TT_LineComment); } + } else if (Current.is(tok::string_literal)) { + if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation && + Current.getPreviousNonComment() && + Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) && + Current.getNextNonComment() && + Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) { + Current.setType(TT_StringInConcatenation); + } } else if (Current.is(tok::l_paren)) { if (lParenStartsCppCast(Current)) Current.setType(TT_CppCastLParen); @@ -2740,6 +2756,19 @@ public: Start = Current; } + if ((Style.isCSharp() || Style.isJavaScript() || + Style.Language == FormatStyle::LK_Java) && + Precedence == prec::Additive && Current) { + // A string can be broken without parentheses around it when it is + // already in a sequence of strings joined by `+` signs. + FormatToken *Prev = Current->getPreviousNonComment(); + if (Prev && Prev->is(tok::string_literal) && + (Prev == Start || Prev->endsSequence(tok::string_literal, tok::plus, + TT_StringInConcatenation))) { + Prev->setType(TT_StringInConcatenation); + } + } + // At the end of the line or when an operator with lower precedence is // found, insert fake parenthesis and return. if (!Current || diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index a27c152..b7bd8d2 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -22,8 +22,13 @@ namespace format { bool WhitespaceManager::Change::IsBeforeInFile::operator()( const Change &C1, const Change &C2) const { return SourceMgr.isBeforeInTranslationUnit( - C1.OriginalWhitespaceRange.getBegin(), - C2.OriginalWhitespaceRange.getBegin()); + C1.OriginalWhitespaceRange.getBegin(), + C2.OriginalWhitespaceRange.getBegin()) || + (C1.OriginalWhitespaceRange.getBegin() == + C2.OriginalWhitespaceRange.getBegin() && + SourceMgr.isBeforeInTranslationUnit( + C1.OriginalWhitespaceRange.getEnd(), + C2.OriginalWhitespaceRange.getEnd())); } WhitespaceManager::Change::Change(const FormatToken &Tok, @@ -1516,10 +1521,55 @@ WhitespaceManager::linkCells(CellDescriptions &&CellDesc) { void WhitespaceManager::generateChanges() { for (unsigned i = 0, e = Changes.size(); i != e; ++i) { const Change &C = Changes[i]; - if (i > 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() == - C.OriginalWhitespaceRange.getBegin()) { - // Do not generate two replacements for the same location. - continue; + if (i > 0) { + auto Last = Changes[i - 1].OriginalWhitespaceRange; + auto New = Changes[i].OriginalWhitespaceRange; + // Do not generate two replacements for the same location. As a special + // case, it is allowed if there is a replacement for the empty range + // between 2 tokens and another non-empty range at the start of the second + // token. We didn't implement logic to combine replacements for 2 + // consecutive source ranges into a single replacement, because the + // program works fine without it. + // + // We can't eliminate empty original whitespace ranges. They appear when + // 2 tokens have no whitespace in between in the input. It does not + // matter whether whitespace is to be added. If no whitespace is to be + // added, the replacement will be empty, and it gets eliminated after this + // step in storeReplacement. For example, if the input is `foo();`, + // there will be a replacement for the range between every consecutive + // pair of tokens. + // + // A replacement at the start of a token can be added by + // BreakableStringLiteralUsingOperators::insertBreak when it adds braces + // around the string literal. Say Verilog code is being formatted and the + // first line is to become the next 2 lines. + // x("long string"); + // x({"long ", + // "string"}); + // There will be a replacement for the empty range between the parenthesis + // and the string and another replacement for the quote character. The + // replacement for the empty range between the parenthesis and the quote + // comes from ContinuationIndenter::addTokenOnCurrentLine when it changes + // the original empty range between the parenthesis and the string to + // another empty one. The replacement for the quote character comes from + // BreakableStringLiteralUsingOperators::insertBreak when it adds the + // brace. In the example, the replacement for the empty range is the same + // as the original text. However, eliminating replacements that are same + // as the original does not help in general. For example, a newline can + // be inserted, causing the first line to become the next 3 lines. + // xxxxxxxxxxx("long string"); + // xxxxxxxxxxx( + // {"long ", + // "string"}); + // In that case, the empty range between the parenthesis and the string + // will be replaced by a newline and 4 spaces. So we will still have to + // deal with a replacement for an empty source range followed by a + // replacement for a non-empty source range. + if (Last.getBegin() == New.getBegin() && + (Last.getEnd() != Last.getBegin() || + New.getEnd() == New.getBegin())) { + continue; + } } if (C.CreateReplacement) { std::string ReplacementText = C.PreviousLinePostfix; |