diff options
author | Raiki Tamura <tamaron1203@gmail.com> | 2023-06-28 19:14:50 +0900 |
---|---|---|
committer | Arthur Cohen <arthur.cohen@embecosm.com> | 2024-01-16 18:46:31 +0100 |
commit | 84a14f3d88f2568af7922c47f43960bde3904205 (patch) | |
tree | b874d577b5aca01080eb94c367d93a2d66b13e12 /gcc | |
parent | 5373b8e66cc66e0691e60ec7a2ea633ca7e5a9d1 (diff) | |
download | gcc-84a14f3d88f2568af7922c47f43960bde3904205.zip gcc-84a14f3d88f2568af7922c47f43960bde3904205.tar.gz gcc-84a14f3d88f2568af7922c47f43960bde3904205.tar.bz2 |
gccrs: fix tokenizing utf-8 whitespaces
gcc/rust/ChangeLog:
* lex/rust-lex.cc (Lexer::build_token):add check for all kinds of whitespaces
gcc/testsuite/ChangeLog:
* rust/compile/torture/utf8_whitespaces.rs: New test.
Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/rust/lex/rust-lex.cc | 13 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs | 16 |
2 files changed, 27 insertions, 2 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index aec2a96..7f7fc0c 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -420,7 +420,10 @@ Lexer::build_token () { /* ignore whitespace characters for tokens but continue updating * location */ - case '\n': // newline + case '\n': // newline + case 0x0085: // next line + case 0x2028: // line separator + case 0x2029: // paragraph separator current_line++; current_column = 1; // tell line_table that new line starts @@ -432,10 +435,16 @@ Lexer::build_token () case ' ': // space current_column++; continue; - case '\t': // tab + case '\t': // horizontal tab // width of a tab is not well-defined, assume 8 spaces current_column += 8; continue; + case '\v': // vertical tab + case 0x000c: // form feed + case 0x200e: // left-to-right mark + case 0x200f: // right-to-left mark + // Ignored. + continue; // punctuation - actual tokens case '=': diff --git a/gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs b/gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs new file mode 100644 index 0000000..b45c014 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/utf8_whitespaces.rs @@ -0,0 +1,16 @@ +fn main() { + // FORM FEED + + // LINE TABULATION (vt) + + // NEXT LINE (nel) +
+ // LEFT-TO-RIGHT MARK + + // RIGHT-TO-LEFT MARK + + // LINE SEPARATOR +
+ // PARAGRAPH SEPARATOR +
+} |