diff options
author | Peter Klausler <pklausler@nvidia.com> | 2023-07-13 16:26:23 -0700 |
---|---|---|
committer | Peter Klausler <pklausler@nvidia.com> | 2023-07-31 14:22:43 -0700 |
commit | 6fac3f7b2e9415d181b8d21af57f4e6a312385a5 (patch) | |
tree | 8fe14fe75aef2f77a18c24186dbc6165d086640d /flang/lib/Parser/preprocessor.cpp | |
parent | 299ec3c22a3b4bdfa61502c1485b9dda9db1cf59 (diff) | |
download | llvm-6fac3f7b2e9415d181b8d21af57f4e6a312385a5.zip llvm-6fac3f7b2e9415d181b8d21af57f4e6a312385a5.tar.gz llvm-6fac3f7b2e9415d181b8d21af57f4e6a312385a5.tar.bz2 |
[flang] Stricter "implicit continuation" in preprocessing
The prescanner performs implicit line continuation when it looks
like the parenthesized arguments of a call to a function-like macro
may span multiple lines. In an attempt to work more like a
Fortran-oblivious C preprocessor, the prescanner will act as if
the following lines had been continuations so that the function-like
macro could be invoked.
This still seems like a good idea, but a recent bug report on
LLVM's GitHub issue tracker shows one way in which it could trigger
inadvertently and mess up a program. So this patch makes the
conditions for implicit line continuation much more strict.
First, the leading parenthesis has to have been preceded by an
identifier that's known to be a macro name. (It doesn't have to
be a function-like macro, since it's possible for a keyword-like
macro to expand to the name of a function-like macro.) Second,
no macro definition can ever have had unbalanced parentheses in
its replacement text.
Also cleans up some parenthesis recognition code to fix some
issues found in testing, so that a token with leading or trailing
spaces can still be recognized as a parenthesis or comma.
Fixes https://github.com/llvm/llvm-project/issues/63844.
Differential Revision: https://reviews.llvm.org/D155499
Diffstat (limited to 'flang/lib/Parser/preprocessor.cpp')
-rw-r--r-- | flang/lib/Parser/preprocessor.cpp | 88 |
1 files changed, 56 insertions, 32 deletions
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp index 9197906..d755605 100644 --- a/flang/lib/Parser/preprocessor.cpp +++ b/flang/lib/Parser/preprocessor.cpp @@ -147,12 +147,14 @@ TokenSequence Definition::Apply( CharBlock token{replacement_.TokenAt(j)}; std::size_t bytes{token.size()}; if (skipping) { - if (bytes == 1) { - if (token[0] == '(') { - ++parenthesesNesting; - } else if (token[0] == ')') { - skipping = --parenthesesNesting > 0; + char ch{token.OnlyNonBlank()}; + if (ch == '(') { + ++parenthesesNesting; + } else if (ch == ')') { + if (parenthesesNesting > 0) { + --parenthesesNesting; } + skipping = parenthesesNesting > 0; } continue; } @@ -207,18 +209,21 @@ TokenSequence Definition::Apply( result.Put(args[k]); } } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" && - j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" && + j + 2 < tokens && replacement_.TokenAt(j + 1).OnlyNonBlank() == '(' && parenthesesNesting == 0) { parenthesesNesting = 1; skipping = args.size() == argumentCount_; ++j; } else { - if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') { - ++parenthesesNesting; - } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') { - if (--parenthesesNesting == 0) { - skipping = false; - continue; + if (parenthesesNesting > 0) { + char ch{token.OnlyNonBlank()}; + if (ch == '(') { + ++parenthesesNesting; + } else if (ch == ')') { + if (--parenthesesNesting == 0) { + skipping = false; + continue; + } } } result.Put(replacement_, j); @@ -361,18 +366,16 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement( std::vector<std::size_t> argStart{++k}; for (int nesting{0}; k < tokens; ++k) { CharBlock token{input.TokenAt(k)}; - if (token.size() == 1) { - char ch{token[0]}; - if (ch == '(') { - ++nesting; - } else if (ch == ')') { - if (nesting == 0) { - break; - } - --nesting; - } else if (ch == ',' && nesting == 0) { - argStart.push_back(k + 1); + char ch{token.OnlyNonBlank()}; + if (ch == '(') { + ++nesting; + } else if (ch == ')') { + if (nesting == 0) { + break; } + --nesting; + } else if (ch == ',' && nesting == 0) { + argStart.push_back(k + 1); } } if (argStart.size() == 1 && k == argStart[0] && def->argumentCount() == 0) { @@ -454,12 +457,11 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) { } nameToken = SaveTokenAsName(nameToken); definitions_.erase(nameToken); - if (++j < tokens && dir.TokenAt(j).size() == 1 && - dir.TokenAt(j)[0] == '(') { + if (++j < tokens && dir.TokenAt(j).OnlyNonBlank() == '(') { j = dir.SkipBlanks(j + 1); std::vector<std::string> argName; bool isVariadic{false}; - if (dir.TokenAt(j).ToString() != ")") { + if (dir.TokenAt(j).OnlyNonBlank() != ')') { while (true) { std::string an{dir.TokenAt(j).ToString()}; if (an == "...") { @@ -478,11 +480,11 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) { "#define: malformed argument list"_err_en_US); return; } - std::string punc{dir.TokenAt(j).ToString()}; - if (punc == ")") { + char punc{dir.TokenAt(j).OnlyNonBlank()}; + if (punc == ')') { break; } - if (isVariadic || punc != ",") { + if (isVariadic || punc != ',') { prescanner.Say(dir.GetTokenProvenanceRange(j), "#define: malformed argument list"_err_en_US); return; @@ -502,10 +504,12 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) { } } j = dir.SkipBlanks(j + 1); + CheckForUnbalancedParentheses(dir, j, tokens - j); definitions_.emplace(std::make_pair( nameToken, Definition{argName, dir, j, tokens - j, isVariadic})); } else { j = dir.SkipBlanks(j + 1); + CheckForUnbalancedParentheses(dir, j, tokens - j); definitions_.emplace( std::make_pair(nameToken, Definition{dir, j, tokens - j})); } @@ -883,7 +887,7 @@ static std::int64_t ExpressionValue(const TokenSequence &token, } switch (op) { case PARENS: - if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") { + if (*atToken < tokens && token.TokenAt(*atToken).OnlyNonBlank() == ')') { ++*atToken; break; } @@ -1085,8 +1089,8 @@ bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr, if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") { CharBlock name; if (j + 3 < expr1.SizeInTokens() && - expr1.TokenAt(j + 1).ToString() == "(" && - expr1.TokenAt(j + 3).ToString() == ")") { + expr1.TokenAt(j + 1).OnlyNonBlank() == '(' && + expr1.TokenAt(j + 3).OnlyNonBlank() == ')') { name = expr1.TokenAt(j + 2); j += 3; } else if (j + 1 < expr1.SizeInTokens() && @@ -1176,4 +1180,24 @@ void Preprocessor::LineDirective( sourceFile->LineDirective(pos->trueLineNumber + 1, *linePath, *lineNumber); } } + +void Preprocessor::CheckForUnbalancedParentheses( + const TokenSequence &tokens, std::size_t j, std::size_t n) { + if (!anyMacroWithUnbalancedParentheses_) { + int nesting{0}; + for (; n-- > 0; ++j) { + char ch{tokens.TokenAt(j).OnlyNonBlank()}; + if (ch == '(') { + ++nesting; + } else if (ch == ')') { + if (nesting-- == 0) { + break; + } + } + } + if (nesting != 0) { + anyMacroWithUnbalancedParentheses_ = true; + } + } +} } // namespace Fortran::parser |