diff options
author | Mark Wielaard <mark@klomp.org> | 2021-07-05 21:11:03 +0200 |
---|---|---|
committer | Mark Wielaard <mark@klomp.org> | 2021-07-05 21:11:03 +0200 |
commit | 29192293fdce8b3cf4888bbe0d5f504655e26cd5 (patch) | |
tree | c79b390d50c717ad4e1abd2c262a7b53de69a467 /gcc | |
parent | 27b3d34428801397e562b7fcc5ca10b13961f3e1 (diff) | |
download | gcc-29192293fdce8b3cf4888bbe0d5f504655e26cd5.zip gcc-29192293fdce8b3cf4888bbe0d5f504655e26cd5.tar.gz gcc-29192293fdce8b3cf4888bbe0d5f504655e26cd5.tar.bz2 |
Handle UTF-8 BOM in lexer
The very first thing in a rust source file might be the optional UTF-8
BOM. This is the 3 bytes 0xEF 0xBB 0xBF. They can simply be skipped,
they just mark the file as UTF-8. Add some testcases to show we now
handle such files.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/rust/lex/rust-lex.cc | 13 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/bom.rs | 1 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/bom_comment.rs | 2 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/bom_shebang.rs | 2 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/bom_whitespace.rs | 2 |
5 files changed, 20 insertions, 0 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index ebd69de..617dd69 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -237,6 +237,19 @@ Lexer::build_token () current_char = peek_input (); skip_input (); + // detect UTF8 bom + // + // Must be the first thing on the first line. + // There might be an optional BOM (Byte Order Mark), which for UTF-8 is + // the three bytes 0xEF, 0xBB and 0xBF. These can simply be skipped. + if (current_line == 1 && current_column == 1 && current_char == 0xef + && peek_input () == 0xbb && peek_input (1) == 0xbf) + { + skip_input (1); + current_char = peek_input (); + skip_input (); + } + // detect shebang // Must be the first thing on the first line, starting with #! // But since an attribute can also start with an #! we don't count it as a diff --git a/gcc/testsuite/rust/compile/torture/bom.rs b/gcc/testsuite/rust/compile/torture/bom.rs new file mode 100644 index 0000000..5edcab2 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/bom.rs @@ -0,0 +1 @@ +pub fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/bom_comment.rs b/gcc/testsuite/rust/compile/torture/bom_comment.rs new file mode 100644 index 0000000..020e170 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/bom_comment.rs @@ -0,0 +1,2 @@ +// UTF8 BOM +pub fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/bom_shebang.rs b/gcc/testsuite/rust/compile/torture/bom_shebang.rs new file mode 100644 index 0000000..4c552e8 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/bom_shebang.rs @@ -0,0 +1,2 @@ +#!/usr/bin/cat +pub fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/bom_whitespace.rs b/gcc/testsuite/rust/compile/torture/bom_whitespace.rs new file mode 100644 index 0000000..b10d565 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/bom_whitespace.rs @@ -0,0 +1,2 @@ + +pub fn main () { } |