aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRaiki Tamura <tamaron1203@gmail.com>2023-06-28 19:14:50 +0900
committerCohenArthur <arthur.cohen@embecosm.com>2023-06-29 12:25:26 +0000
commit9e4c3ed357eed2224925169530433af1cd66a307 (patch)
tree026ffd2c5a2dcf0606c2703b0bbc3b8b12504646 /gcc
parent09512e613540ca1b8162030b6b4dd381c36e79eb (diff)
downloadgcc-9e4c3ed357eed2224925169530433af1cd66a307.zip
gcc-9e4c3ed357eed2224925169530433af1cd66a307.tar.gz
gcc-9e4c3ed357eed2224925169530433af1cd66a307.tar.bz2
gccrs: fix tokenizing utf-8 whitespaces
gcc/rust/ChangeLog: * lex/rust-lex.cc (Lexer::build_token):add check for all kinds of whitespaces gcc/testsuite/ChangeLog: * rust/compile/torture/utf8_whitespaces.rs: New test. Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
Diffstat (limited to 'gcc')
-rw-r--r--gcc/rust/lex/rust-lex.cc13
-rw-r--r--gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs16
2 files changed, 27 insertions, 2 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index 43acdf0..28f3863 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -420,7 +420,10 @@ Lexer::build_token ()
{
/* ignore whitespace characters for tokens but continue updating
* location */
- case '\n': // newline
+ case '\n': // newline
+ case 0x0085: // next line
+ case 0x2028: // line separator
+ case 0x2029: // paragraph separator
current_line++;
current_column = 1;
// tell line_table that new line starts
@@ -432,10 +435,16 @@ Lexer::build_token ()
case ' ': // space
current_column++;
continue;
- case '\t': // tab
+ case '\t': // horizontal tab
// width of a tab is not well-defined, assume 8 spaces
current_column += 8;
continue;
+ case '\v': // vertical tab
+ case 0x000c: // form feed
+ case 0x200e: // left-to-right mark
+ case 0x200f: // right-to-left mark
+ // Ignored.
+ continue;
// punctuation - actual tokens
case '=':
diff --git a/gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs b/gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs
new file mode 100644
index 0000000..b45c014
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs
@@ -0,0 +1,16 @@
+fn main() {
+ // FORM FEED
+
+ // LINE TABULATION (vt)
+
+ // NEXT LINE (nel)
+ …
+ // LEFT-TO-RIGHT MARK
+ ‎
+ // RIGHT-TO-LEFT MARK
+ ‏
+ // LINE SEPARATOR
+ 

+ // PARAGRAPH SEPARATOR
+ 

+}