aboutsummaryrefslogtreecommitdiff
path: root/flang/lib/Parser/preprocessor.cpp
diff options
context:
space:
mode:
authorPeter Klausler <pklausler@nvidia.com>2023-07-13 16:26:23 -0700
committerPeter Klausler <pklausler@nvidia.com>2023-07-31 14:22:43 -0700
commit6fac3f7b2e9415d181b8d21af57f4e6a312385a5 (patch)
tree8fe14fe75aef2f77a18c24186dbc6165d086640d /flang/lib/Parser/preprocessor.cpp
parent299ec3c22a3b4bdfa61502c1485b9dda9db1cf59 (diff)
downloadllvm-6fac3f7b2e9415d181b8d21af57f4e6a312385a5.zip
llvm-6fac3f7b2e9415d181b8d21af57f4e6a312385a5.tar.gz
llvm-6fac3f7b2e9415d181b8d21af57f4e6a312385a5.tar.bz2
[flang] Stricter "implicit continuation" in preprocessing
The prescanner performs implicit line continuation when it looks like the parenthesized arguments of a call to a function-like macro may span multiple lines. In an attempt to work more like a Fortran-oblivious C preprocessor, the prescanner will act as if the following lines had been continuations so that the function-like macro could be invoked. This still seems like a good idea, but a recent bug report on LLVM's GitHub issue tracker shows one way in which it could trigger inadvertently and mess up a program. So this patch makes the conditions for implicit line continuation much more strict. First, the leading parenthesis has to have been preceded by an identifier that's known to be a macro name. (It doesn't have to be a function-like macro, since it's possible for a keyword-like macro to expand to the name of a function-like macro.) Second, no macro definition can ever have had unbalanced parentheses in its replacement text. Also cleans up some parenthesis recognition code to fix some issues found in testing, so that a token with leading or trailing spaces can still be recognized as a parenthesis or comma. Fixes https://github.com/llvm/llvm-project/issues/63844. Differential Revision: https://reviews.llvm.org/D155499
Diffstat (limited to 'flang/lib/Parser/preprocessor.cpp')
-rw-r--r--flang/lib/Parser/preprocessor.cpp88
1 files changed, 56 insertions, 32 deletions
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 9197906..d755605 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -147,12 +147,14 @@ TokenSequence Definition::Apply(
CharBlock token{replacement_.TokenAt(j)};
std::size_t bytes{token.size()};
if (skipping) {
- if (bytes == 1) {
- if (token[0] == '(') {
- ++parenthesesNesting;
- } else if (token[0] == ')') {
- skipping = --parenthesesNesting > 0;
+ char ch{token.OnlyNonBlank()};
+ if (ch == '(') {
+ ++parenthesesNesting;
+ } else if (ch == ')') {
+ if (parenthesesNesting > 0) {
+ --parenthesesNesting;
}
+ skipping = parenthesesNesting > 0;
}
continue;
}
@@ -207,18 +209,21 @@ TokenSequence Definition::Apply(
result.Put(args[k]);
}
} else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
- j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
+ j + 2 < tokens && replacement_.TokenAt(j + 1).OnlyNonBlank() == '(' &&
parenthesesNesting == 0) {
parenthesesNesting = 1;
skipping = args.size() == argumentCount_;
++j;
} else {
- if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
- ++parenthesesNesting;
- } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
- if (--parenthesesNesting == 0) {
- skipping = false;
- continue;
+ if (parenthesesNesting > 0) {
+ char ch{token.OnlyNonBlank()};
+ if (ch == '(') {
+ ++parenthesesNesting;
+ } else if (ch == ')') {
+ if (--parenthesesNesting == 0) {
+ skipping = false;
+ continue;
+ }
}
}
result.Put(replacement_, j);
@@ -361,18 +366,16 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
std::vector<std::size_t> argStart{++k};
for (int nesting{0}; k < tokens; ++k) {
CharBlock token{input.TokenAt(k)};
- if (token.size() == 1) {
- char ch{token[0]};
- if (ch == '(') {
- ++nesting;
- } else if (ch == ')') {
- if (nesting == 0) {
- break;
- }
- --nesting;
- } else if (ch == ',' && nesting == 0) {
- argStart.push_back(k + 1);
+ char ch{token.OnlyNonBlank()};
+ if (ch == '(') {
+ ++nesting;
+ } else if (ch == ')') {
+ if (nesting == 0) {
+ break;
}
+ --nesting;
+ } else if (ch == ',' && nesting == 0) {
+ argStart.push_back(k + 1);
}
}
if (argStart.size() == 1 && k == argStart[0] && def->argumentCount() == 0) {
@@ -454,12 +457,11 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
}
nameToken = SaveTokenAsName(nameToken);
definitions_.erase(nameToken);
- if (++j < tokens && dir.TokenAt(j).size() == 1 &&
- dir.TokenAt(j)[0] == '(') {
+ if (++j < tokens && dir.TokenAt(j).OnlyNonBlank() == '(') {
j = dir.SkipBlanks(j + 1);
std::vector<std::string> argName;
bool isVariadic{false};
- if (dir.TokenAt(j).ToString() != ")") {
+ if (dir.TokenAt(j).OnlyNonBlank() != ')') {
while (true) {
std::string an{dir.TokenAt(j).ToString()};
if (an == "...") {
@@ -478,11 +480,11 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
"#define: malformed argument list"_err_en_US);
return;
}
- std::string punc{dir.TokenAt(j).ToString()};
- if (punc == ")") {
+ char punc{dir.TokenAt(j).OnlyNonBlank()};
+ if (punc == ')') {
break;
}
- if (isVariadic || punc != ",") {
+ if (isVariadic || punc != ',') {
prescanner.Say(dir.GetTokenProvenanceRange(j),
"#define: malformed argument list"_err_en_US);
return;
@@ -502,10 +504,12 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
}
}
j = dir.SkipBlanks(j + 1);
+ CheckForUnbalancedParentheses(dir, j, tokens - j);
definitions_.emplace(std::make_pair(
nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
} else {
j = dir.SkipBlanks(j + 1);
+ CheckForUnbalancedParentheses(dir, j, tokens - j);
definitions_.emplace(
std::make_pair(nameToken, Definition{dir, j, tokens - j}));
}
@@ -883,7 +887,7 @@ static std::int64_t ExpressionValue(const TokenSequence &token,
}
switch (op) {
case PARENS:
- if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
+ if (*atToken < tokens && token.TokenAt(*atToken).OnlyNonBlank() == ')') {
++*atToken;
break;
}
@@ -1085,8 +1089,8 @@ bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
CharBlock name;
if (j + 3 < expr1.SizeInTokens() &&
- expr1.TokenAt(j + 1).ToString() == "(" &&
- expr1.TokenAt(j + 3).ToString() == ")") {
+ expr1.TokenAt(j + 1).OnlyNonBlank() == '(' &&
+ expr1.TokenAt(j + 3).OnlyNonBlank() == ')') {
name = expr1.TokenAt(j + 2);
j += 3;
} else if (j + 1 < expr1.SizeInTokens() &&
@@ -1176,4 +1180,24 @@ void Preprocessor::LineDirective(
sourceFile->LineDirective(pos->trueLineNumber + 1, *linePath, *lineNumber);
}
}
+
+void Preprocessor::CheckForUnbalancedParentheses(
+ const TokenSequence &tokens, std::size_t j, std::size_t n) {
+ if (!anyMacroWithUnbalancedParentheses_) {
+ int nesting{0};
+ for (; n-- > 0; ++j) {
+ char ch{tokens.TokenAt(j).OnlyNonBlank()};
+ if (ch == '(') {
+ ++nesting;
+ } else if (ch == ')') {
+ if (nesting-- == 0) {
+ break;
+ }
+ }
+ }
+ if (nesting != 0) {
+ anyMacroWithUnbalancedParentheses_ = true;
+ }
+ }
+}
} // namespace Fortran::parser