[flang] Stricter "implicit continuation" in preprocessing

The prescanner performs implicit line continuation when it looks like the parenthesized arguments of a call to a function-like macro may span multiple lines. In an attempt to work more like a Fortran-oblivious C preprocessor, the prescanner will act as if the following lines had been continuations so that the function-like macro could be invoked. This still seems like a good idea, but a recent bug report on LLVM's GitHub issue tracker shows one way in which it could trigger inadvertently and mess up a program. So this patch makes the conditions for implicit line continuation much more strict. First, the leading parenthesis has to have been preceded by an identifier that's known to be a macro name. (It doesn't have to be a function-like macro, since it's possible for a keyword-like macro to expand to the name of a function-like macro.) Second, no macro definition can ever have had unbalanced parentheses in its replacement text. Also cleans up some parenthesis recognition code to fix some issues found in testing, so that a token with leading or trailing spaces can still be recognized as a parenthesis or comma. Fixes https://github.com/llvm/llvm-project/issues/63844. Differential Revision: https://reviews.llvm.org/D155499
author: Peter Klausler <pklausler@nvidia.com> 2023-07-13 16:26:23 -0700
committer: Peter Klausler <pklausler@nvidia.com> 2023-07-31 14:22:43 -0700
commit: 6fac3f7b2e9415d181b8d21af57f4e6a312385a5 (patch)
tree: 8fe14fe75aef2f77a18c24186dbc6165d086640d /flang/lib/Parser/preprocessor.cpp
parent: 299ec3c22a3b4bdfa61502c1485b9dda9db1cf59 (diff)
download: llvm-6fac3f7b2e9415d181b8d21af57f4e6a312385a5.zip
llvm-6fac3f7b2e9415d181b8d21af57f4e6a312385a5.tar.gz
llvm-6fac3f7b2e9415d181b8d21af57f4e6a312385a5.tar.bz2
1 files changed, 56 insertions, 32 deletions
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 9197906..d755605 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -147,12 +147,14 @@ TokenSequence Definition::Apply(
     CharBlock token{replacement_.TokenAt(j)};
     std::size_t bytes{token.size()};
     if (skipping) {
-      if (bytes == 1) {
-        if (token[0] == '(') {
-          ++parenthesesNesting;
-        } else if (token[0] == ')') {
-          skipping = --parenthesesNesting > 0;
+      char ch{token.OnlyNonBlank()};
+      if (ch == '(') {
+        ++parenthesesNesting;
+      } else if (ch == ')') {
+        if (parenthesesNesting > 0) {
+          --parenthesesNesting;
         }
+        skipping = parenthesesNesting > 0;
       }
       continue;
     }
@@ -207,18 +209,21 @@ TokenSequence Definition::Apply(
         result.Put(args[k]);
       }
     } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
-        j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
+        j + 2 < tokens && replacement_.TokenAt(j + 1).OnlyNonBlank() == '(' &&
         parenthesesNesting == 0) {
       parenthesesNesting = 1;
       skipping = args.size() == argumentCount_;
       ++j;
     } else {
-      if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
-        ++parenthesesNesting;
-      } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
-        if (--parenthesesNesting == 0) {
-          skipping = false;
-          continue;
+      if (parenthesesNesting > 0) {
+        char ch{token.OnlyNonBlank()};
+        if (ch == '(') {
+          ++parenthesesNesting;
+        } else if (ch == ')') {
+          if (--parenthesesNesting == 0) {
+            skipping = false;
+            continue;
+          }
         }
       }
       result.Put(replacement_, j);
@@ -361,18 +366,16 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
     std::vector<std::size_t> argStart{++k};
     for (int nesting{0}; k < tokens; ++k) {
       CharBlock token{input.TokenAt(k)};
-      if (token.size() == 1) {
-        char ch{token[0]};
-        if (ch == '(') {
-          ++nesting;
-        } else if (ch == ')') {
-          if (nesting == 0) {
-            break;
-          }
-          --nesting;
-        } else if (ch == ',' && nesting == 0) {
-          argStart.push_back(k + 1);
+      char ch{token.OnlyNonBlank()};
+      if (ch == '(') {
+        ++nesting;
+      } else if (ch == ')') {
+        if (nesting == 0) {
+          break;
         }
+        --nesting;
+      } else if (ch == ',' && nesting == 0) {
+        argStart.push_back(k + 1);
       }
     }
     if (argStart.size() == 1 && k == argStart[0] && def->argumentCount() == 0) {
@@ -454,12 +457,11 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
     }
     nameToken = SaveTokenAsName(nameToken);
     definitions_.erase(nameToken);
-    if (++j < tokens && dir.TokenAt(j).size() == 1 &&
-        dir.TokenAt(j)[0] == '(') {
+    if (++j < tokens && dir.TokenAt(j).OnlyNonBlank() == '(') {
       j = dir.SkipBlanks(j + 1);
       std::vector<std::string> argName;
       bool isVariadic{false};
-      if (dir.TokenAt(j).ToString() != ")") {
+      if (dir.TokenAt(j).OnlyNonBlank() != ')') {
         while (true) {
           std::string an{dir.TokenAt(j).ToString()};
           if (an == "...") {
@@ -478,11 +480,11 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
                 "#define: malformed argument list"_err_en_US);
             return;
           }
-          std::string punc{dir.TokenAt(j).ToString()};
-          if (punc == ")") {
+          char punc{dir.TokenAt(j).OnlyNonBlank()};
+          if (punc == ')') {
             break;
           }
-          if (isVariadic || punc != ",") {
+          if (isVariadic || punc != ',') {
             prescanner.Say(dir.GetTokenProvenanceRange(j),
                 "#define: malformed argument list"_err_en_US);
             return;
@@ -502,10 +504,12 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
         }
       }
       j = dir.SkipBlanks(j + 1);
+      CheckForUnbalancedParentheses(dir, j, tokens - j);
       definitions_.emplace(std::make_pair(
           nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
     } else {
       j = dir.SkipBlanks(j + 1);
+      CheckForUnbalancedParentheses(dir, j, tokens - j);
       definitions_.emplace(
           std::make_pair(nameToken, Definition{dir, j, tokens - j}));
     }
@@ -883,7 +887,7 @@ static std::int64_t ExpressionValue(const TokenSequence &token,
     }
     switch (op) {
     case PARENS:
-      if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
+      if (*atToken < tokens && token.TokenAt(*atToken).OnlyNonBlank() == ')') {
         ++*atToken;
         break;
       }
@@ -1085,8 +1089,8 @@ bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
     if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
       CharBlock name;
       if (j + 3 < expr1.SizeInTokens() &&
-          expr1.TokenAt(j + 1).ToString() == "(" &&
-          expr1.TokenAt(j + 3).ToString() == ")") {
+          expr1.TokenAt(j + 1).OnlyNonBlank() == '(' &&
+          expr1.TokenAt(j + 3).OnlyNonBlank() == ')') {
         name = expr1.TokenAt(j + 2);
         j += 3;
       } else if (j + 1 < expr1.SizeInTokens() &&
@@ -1176,4 +1180,24 @@ void Preprocessor::LineDirective(
     sourceFile->LineDirective(pos->trueLineNumber + 1, *linePath, *lineNumber);
   }
 }
+
+void Preprocessor::CheckForUnbalancedParentheses(
+    const TokenSequence &tokens, std::size_t j, std::size_t n) {
+  if (!anyMacroWithUnbalancedParentheses_) {
+    int nesting{0};
+    for (; n-- > 0; ++j) {
+      char ch{tokens.TokenAt(j).OnlyNonBlank()};
+      if (ch == '(') {
+        ++nesting;
+      } else if (ch == ')') {
+        if (nesting-- == 0) {
+          break;
+        }
+      }
+    }
+    if (nesting != 0) {
+      anyMacroWithUnbalancedParentheses_ = true;
+    }
+  }
+}
 } // namespace Fortran::parser
author	Peter Klausler <pklausler@nvidia.com>	2023-07-13 16:26:23 -0700
committer	Peter Klausler <pklausler@nvidia.com>	2023-07-31 14:22:43 -0700
commit	6fac3f7b2e9415d181b8d21af57f4e6a312385a5 (patch)
tree	8fe14fe75aef2f77a18c24186dbc6165d086640d /flang/lib/Parser/preprocessor.cpp
parent	299ec3c22a3b4bdfa61502c1485b9dda9db1cf59 (diff)
download	llvm-6fac3f7b2e9415d181b8d21af57f4e6a312385a5.zip llvm-6fac3f7b2e9415d181b8d21af57f4e6a312385a5.tar.gz llvm-6fac3f7b2e9415d181b8d21af57f4e6a312385a5.tar.bz2