aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Format/WhitespaceManager.cpp
diff options
context:
space:
mode:
authorsstwcw <su3e8a96kzlver@posteo.net>2023-06-29 15:23:36 +0000
committersstwcw <su3e8a96kzlver@posteo.net>2023-09-05 03:19:49 +0000
commitddc80637ccbc5be26ae40f01841c6019a38d1955 (patch)
treede973a69e1871bdc0e0193f138606652f6b4b507 /clang/lib/Format/WhitespaceManager.cpp
parentef5217b3c0dcbb58927fe43400b6d1faa677bf98 (diff)
downloadllvm-ddc80637ccbc5be26ae40f01841c6019a38d1955.zip
llvm-ddc80637ccbc5be26ae40f01841c6019a38d1955.tar.gz
llvm-ddc80637ccbc5be26ae40f01841c6019a38d1955.tar.bz2
[clang-format] Break long string literals in C#, etc.
Now strings that are too long for one line in C#, Java, JavaScript, and Verilog get broken into several lines. C# and JavaScript interpolated strings are not broken. A new subclass BreakableStringLiteralUsingOperators is used to handle the logic for adding plus signs and commas. The updateAfterBroken method was added because now parentheses or braces may be required after the parentheses or commas are added. In order to decide whether the added plus sign should be unindented in the BreakableToken object, the logic for it is taken out into a separate function shouldUnindentNextOperator. The logic for finding the continuation indentation when the option AlignAfterOpenBracket is set to DontAlign is not implemented yet. So in that case the new line may have the wrong indentation, and the parts may have the wrong length if the string needs to be broken more than once because finding where to break the string depends on where the string starts. The preambles for the C# and Java unit tests are changed to the newer style in order to allow the 3-argument verifyFormat macro. Some cases are changed from verifyFormat to verifyImcompleteFormat because those use incomplete code and the new verifyFormat function checks that the code is complete. The line in the doc was changed to being indented by 4 spaces, that is, the default continuation indentation. It has always been the case. It was probably a mistake that the doc showed 2 spaces previously. This commit was fist committed as 16ccba51072b. The tests caused assertion failures. Then it was reverted in 547bce36132a. Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D154093
Diffstat (limited to 'clang/lib/Format/WhitespaceManager.cpp')
-rw-r--r--clang/lib/Format/WhitespaceManager.cpp62
1 files changed, 56 insertions, 6 deletions
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index a27c152..b7bd8d2 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -22,8 +22,13 @@ namespace format {
bool WhitespaceManager::Change::IsBeforeInFile::operator()(
const Change &C1, const Change &C2) const {
return SourceMgr.isBeforeInTranslationUnit(
- C1.OriginalWhitespaceRange.getBegin(),
- C2.OriginalWhitespaceRange.getBegin());
+ C1.OriginalWhitespaceRange.getBegin(),
+ C2.OriginalWhitespaceRange.getBegin()) ||
+ (C1.OriginalWhitespaceRange.getBegin() ==
+ C2.OriginalWhitespaceRange.getBegin() &&
+ SourceMgr.isBeforeInTranslationUnit(
+ C1.OriginalWhitespaceRange.getEnd(),
+ C2.OriginalWhitespaceRange.getEnd()));
}
WhitespaceManager::Change::Change(const FormatToken &Tok,
@@ -1516,10 +1521,55 @@ WhitespaceManager::linkCells(CellDescriptions &&CellDesc) {
void WhitespaceManager::generateChanges() {
for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
const Change &C = Changes[i];
- if (i > 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() ==
- C.OriginalWhitespaceRange.getBegin()) {
- // Do not generate two replacements for the same location.
- continue;
+ if (i > 0) {
+ auto Last = Changes[i - 1].OriginalWhitespaceRange;
+ auto New = Changes[i].OriginalWhitespaceRange;
+ // Do not generate two replacements for the same location. As a special
+ // case, it is allowed if there is a replacement for the empty range
+ // between 2 tokens and another non-empty range at the start of the second
+ // token. We didn't implement logic to combine replacements for 2
+ // consecutive source ranges into a single replacement, because the
+ // program works fine without it.
+ //
+ // We can't eliminate empty original whitespace ranges. They appear when
+ // 2 tokens have no whitespace in between in the input. It does not
+ // matter whether whitespace is to be added. If no whitespace is to be
+ // added, the replacement will be empty, and it gets eliminated after this
+ // step in storeReplacement. For example, if the input is `foo();`,
+ // there will be a replacement for the range between every consecutive
+ // pair of tokens.
+ //
+ // A replacement at the start of a token can be added by
+ // BreakableStringLiteralUsingOperators::insertBreak when it adds braces
+ // around the string literal. Say Verilog code is being formatted and the
+ // first line is to become the next 2 lines.
+ // x("long string");
+ // x({"long ",
+ // "string"});
+ // There will be a replacement for the empty range between the parenthesis
+ // and the string and another replacement for the quote character. The
+ // replacement for the empty range between the parenthesis and the quote
+ // comes from ContinuationIndenter::addTokenOnCurrentLine when it changes
+ // the original empty range between the parenthesis and the string to
+ // another empty one. The replacement for the quote character comes from
+ // BreakableStringLiteralUsingOperators::insertBreak when it adds the
+ // brace. In the example, the replacement for the empty range is the same
+ // as the original text. However, eliminating replacements that are same
+ // as the original does not help in general. For example, a newline can
+ // be inserted, causing the first line to become the next 3 lines.
+ // xxxxxxxxxxx("long string");
+ // xxxxxxxxxxx(
+ // {"long ",
+ // "string"});
+ // In that case, the empty range between the parenthesis and the string
+ // will be replaced by a newline and 4 spaces. So we will still have to
+ // deal with a replacement for an empty source range followed by a
+ // replacement for a non-empty source range.
+ if (Last.getBegin() == New.getBegin() &&
+ (Last.getEnd() != Last.getBegin() ||
+ New.getEnd() == New.getBegin())) {
+ continue;
+ }
}
if (C.CreateReplacement) {
std::string ReplacementText = C.PreviousLinePostfix;