aboutsummaryrefslogtreecommitdiff
path: root/gcc/rust
diff options
context:
space:
mode:
authorRaiki Tamura <tamaron1203@gmail.com>2023-06-28 19:14:50 +0900
committerCohenArthur <arthur.cohen@embecosm.com>2023-06-29 12:25:26 +0000
commit9e4c3ed357eed2224925169530433af1cd66a307 (patch)
tree026ffd2c5a2dcf0606c2703b0bbc3b8b12504646 /gcc/rust
parent09512e613540ca1b8162030b6b4dd381c36e79eb (diff)
downloadgcc-9e4c3ed357eed2224925169530433af1cd66a307.zip
gcc-9e4c3ed357eed2224925169530433af1cd66a307.tar.gz
gcc-9e4c3ed357eed2224925169530433af1cd66a307.tar.bz2
gccrs: fix tokenizing utf-8 whitespaces
gcc/rust/ChangeLog: * lex/rust-lex.cc (Lexer::build_token):add check for all kinds of whitespaces gcc/testsuite/ChangeLog: * rust/compile/torture/utf8_whitespaces.rs: New test. Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
Diffstat (limited to 'gcc/rust')
-rw-r--r--gcc/rust/lex/rust-lex.cc13
1 files changed, 11 insertions, 2 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index 43acdf0..28f3863 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -420,7 +420,10 @@ Lexer::build_token ()
{
/* ignore whitespace characters for tokens but continue updating
* location */
- case '\n': // newline
+ case '\n': // newline
+ case 0x0085: // next line
+ case 0x2028: // line separator
+ case 0x2029: // paragraph separator
current_line++;
current_column = 1;
// tell line_table that new line starts
@@ -432,10 +435,16 @@ Lexer::build_token ()
case ' ': // space
current_column++;
continue;
- case '\t': // tab
+ case '\t': // horizontal tab
// width of a tab is not well-defined, assume 8 spaces
current_column += 8;
continue;
+ case '\v': // vertical tab
+ case 0x000c: // form feed
+ case 0x200e: // left-to-right mark
+ case 0x200f: // right-to-left mark
+ // Ignored.
+ continue;
// punctuation - actual tokens
case '=':