diff options
author | Raiki Tamura <tamaron1203@gmail.com> | 2023-06-28 19:14:50 +0900 |
---|---|---|
committer | CohenArthur <arthur.cohen@embecosm.com> | 2023-06-29 12:25:26 +0000 |
commit | 9e4c3ed357eed2224925169530433af1cd66a307 (patch) | |
tree | 026ffd2c5a2dcf0606c2703b0bbc3b8b12504646 /gcc | |
parent | 09512e613540ca1b8162030b6b4dd381c36e79eb (diff) | |
download | gcc-9e4c3ed357eed2224925169530433af1cd66a307.zip gcc-9e4c3ed357eed2224925169530433af1cd66a307.tar.gz gcc-9e4c3ed357eed2224925169530433af1cd66a307.tar.bz2 |
gccrs: fix tokenizing utf-8 whitespaces
gcc/rust/ChangeLog:
* lex/rust-lex.cc (Lexer::build_token):add check for all kinds of whitespaces
gcc/testsuite/ChangeLog:
* rust/compile/torture/utf8_whitespaces.rs: New test.
Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/rust/lex/rust-lex.cc | 13 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs | 16 |
2 files changed, 27 insertions, 2 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 43acdf0..28f3863 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -420,7 +420,10 @@ Lexer::build_token () { /* ignore whitespace characters for tokens but continue updating * location */ - case '\n': // newline + case '\n': // newline + case 0x0085: // next line + case 0x2028: // line separator + case 0x2029: // paragraph separator current_line++; current_column = 1; // tell line_table that new line starts @@ -432,10 +435,16 @@ Lexer::build_token () case ' ': // space current_column++; continue; - case '\t': // tab + case '\t': // horizontal tab // width of a tab is not well-defined, assume 8 spaces current_column += 8; continue; + case '\v': // vertical tab + case 0x000c: // form feed + case 0x200e: // left-to-right mark + case 0x200f: // right-to-left mark + // Ignored. + continue; // punctuation - actual tokens case '=': diff --git a/gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs b/gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs new file mode 100644 index 0000000..b45c014 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs @@ -0,0 +1,16 @@ +fn main() { + // FORM FEED + + // LINE TABULATION (vt) + + // NEXT LINE (nel) +
+ // LEFT-TO-RIGHT MARK + + // RIGHT-TO-LEFT MARK + + // LINE SEPARATOR +
+ // PARAGRAPH SEPARATOR +
+} |