aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRaiki Tamura <tamaron1203@gmail.com>2023-06-28 19:14:50 +0900
committerArthur Cohen <arthur.cohen@embecosm.com>2024-01-16 18:46:31 +0100
commit84a14f3d88f2568af7922c47f43960bde3904205 (patch)
treeb874d577b5aca01080eb94c367d93a2d66b13e12 /gcc
parent5373b8e66cc66e0691e60ec7a2ea633ca7e5a9d1 (diff)
downloadgcc-84a14f3d88f2568af7922c47f43960bde3904205.zip
gcc-84a14f3d88f2568af7922c47f43960bde3904205.tar.gz
gcc-84a14f3d88f2568af7922c47f43960bde3904205.tar.bz2
gccrs: fix tokenizing utf-8 whitespaces
gcc/rust/ChangeLog: * lex/rust-lex.cc (Lexer::build_token):add check for all kinds of whitespaces gcc/testsuite/ChangeLog: * rust/compile/torture/utf8_whitespaces.rs: New test. Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
Diffstat (limited to 'gcc')
-rw-r--r--gcc/rust/lex/rust-lex.cc13
-rw-r--r--gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs16
2 files changed, 27 insertions, 2 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index aec2a96..7f7fc0c 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -420,7 +420,10 @@ Lexer::build_token ()
{
/* ignore whitespace characters for tokens but continue updating
* location */
- case '\n': // newline
+ case '\n': // newline
+ case 0x0085: // next line
+ case 0x2028: // line separator
+ case 0x2029: // paragraph separator
current_line++;
current_column = 1;
// tell line_table that new line starts
@@ -432,10 +435,16 @@ Lexer::build_token ()
case ' ': // space
current_column++;
continue;
- case '\t': // tab
+ case '\t': // horizontal tab
// width of a tab is not well-defined, assume 8 spaces
current_column += 8;
continue;
+ case '\v': // vertical tab
+ case 0x000c: // form feed
+ case 0x200e: // left-to-right mark
+ case 0x200f: // right-to-left mark
+ // Ignored.
+ continue;
// punctuation - actual tokens
case '=':
diff --git a/gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs b/gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs
new file mode 100644
index 0000000..b45c014
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs
@@ -0,0 +1,16 @@
+fn main() {
+ // FORM FEED
+
+ // LINE TABULATION (vt)
+
+ // NEXT LINE (nel)
+ …
+ // LEFT-TO-RIGHT MARK
+ ‎
+ // RIGHT-TO-LEFT MARK
+ ‏
+ // LINE SEPARATOR
+ 

+ // PARAGRAPH SEPARATOR
+ 

+}