[flang] Fix character initialization after continuation

The insertion of a space on a line continuation right before a character literal was confusing TokenSequence::ToLowerCase(), that was unable to identify the character literal as such, causing it to be converted to lower case. Fix this by skipping spaces in the beginning and end of each token, before testing for token type. Fixes https://github.com/llvm/llvm-project/issues/62039 Reviewed By: klausler Differential Revision: https://reviews.llvm.org/D151885
author: Leandro Lupori <leandro.lupori@linaro.org> 2023-05-31 22:11:51 +0000
committer: Leandro Lupori <leandro.lupori@linaro.org> 2023-06-01 16:48:11 -0300
commit: 0e1fa9174072a3b896533d151336884e4eb1486a (patch)
tree: b407e7267e9da4b544107d0a518f8d95b81c0d57 /flang/lib/Parser/token-sequence.cpp
parent: 81ea6b7e4b6c374c026dcf1ce742db36de2e56d4 (diff)
download: llvm-0e1fa9174072a3b896533d151336884e4eb1486a.zip
llvm-0e1fa9174072a3b896533d151336884e4eb1486a.tar.gz
llvm-0e1fa9174072a3b896533d151336884e4eb1486a.tar.bz2
1 files changed, 13 insertions, 4 deletions
diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp
index eaa2bf3..f94c8142 100644
--- a/flang/lib/Parser/token-sequence.cpp
+++ b/flang/lib/Parser/token-sequence.cpp
@@ -155,7 +155,16 @@ TokenSequence &TokenSequence::ToLowerCase() {
     std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
     char *p{&char_[j]};
     char const *limit{char_.data() + nextStart};
+    const char *lastChar{limit - 1};
     j = nextStart;
+    // Skip leading whitespaces
+    while (p < limit - 1 && *p == ' ') {
+      ++p;
+    }
+    // Find last non-whitespace char
+    while (lastChar > p + 1 && *lastChar == ' ') {
+      --lastChar;
+    }
     if (IsDecimalDigit(*p)) {
       while (p < limit && IsDecimalDigit(*p)) {
         ++p;
@@ -172,17 +181,17 @@ TokenSequence &TokenSequence::ToLowerCase() {
           *p = ToLowerCaseLetter(*p);
         }
       }
-    } else if (limit[-1] == '\'' || limit[-1] == '"') {
-      if (*p == limit[-1]) {
+    } else if (*lastChar == '\'' || *lastChar == '"') {
+      if (*p == *lastChar) {
         // Character literal without prefix
-      } else if (p[1] == limit[-1]) {
+      } else if (p[1] == *lastChar) {
         // BOZX-prefixed constant
         for (; p < limit; ++p) {
           *p = ToLowerCaseLetter(*p);
         }
       } else {
         // Literal with kind-param prefix name (e.g., K_"ABC").
-        for (; *p != limit[-1]; ++p) {
+        for (; *p != *lastChar; ++p) {
           *p = ToLowerCaseLetter(*p);
         }
       }
author	Leandro Lupori <leandro.lupori@linaro.org>	2023-05-31 22:11:51 +0000
committer	Leandro Lupori <leandro.lupori@linaro.org>	2023-06-01 16:48:11 -0300
commit	0e1fa9174072a3b896533d151336884e4eb1486a (patch)
tree	b407e7267e9da4b544107d0a518f8d95b81c0d57 /flang/lib/Parser/token-sequence.cpp
parent	81ea6b7e4b6c374c026dcf1ce742db36de2e56d4 (diff)
download	llvm-0e1fa9174072a3b896533d151336884e4eb1486a.zip llvm-0e1fa9174072a3b896533d151336884e4eb1486a.tar.gz llvm-0e1fa9174072a3b896533d151336884e4eb1486a.tar.bz2