aboutsummaryrefslogtreecommitdiff
path: root/gcc/rust
diff options
context:
space:
mode:
authorMark Wielaard <mark@klomp.org>2021-07-04 23:22:32 +0200
committerMark Wielaard <mark@klomp.org>2021-07-04 23:56:06 +0200
commitff35f162daebd1ac6538aa74c0f270f8e19902de (patch)
tree487d1b338b5a4060bbcb605d0c9e298475d1587a /gcc/rust
parent210ae4f7b0fea9671482b8f01354fd5b9274f878 (diff)
downloadgcc-ff35f162daebd1ac6538aa74c0f270f8e19902de.zip
gcc-ff35f162daebd1ac6538aa74c0f270f8e19902de.tar.gz
gcc-ff35f162daebd1ac6538aa74c0f270f8e19902de.tar.bz2
Handle shebang line, plus any whitespace and comment skipping in lexer
The lexer tried to handle the shebang line but used loc directly, instead of the current_column. And it assumed a '/' should immediately follow the "#!". But if the "#!" is followed by whitespace and/or comments and a '[' character, then the first line isn't see as a shebang line (even if the kernel or shell would) but as the start of an inner attribute. Add various tests for when the first line starting with "#!" is seen as a shebang line (and should be skipped). And some tests there is a '[' character following some whitespace and/or comments and the "#!" is seen as part of an inner attribute.
Diffstat (limited to 'gcc/rust')
-rw-r--r--gcc/rust/lex/rust-lex.cc79
1 files changed, 60 insertions, 19 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index d138416..ebd69de 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -237,28 +237,63 @@ Lexer::build_token ()
current_char = peek_input ();
skip_input ();
- // return end of file token if end of file
- if (current_char == EOF)
- return Token::make (END_OF_FILE, loc);
-
// detect shebang
- if (loc == 1 && current_line == 1 && current_char == '#')
+ // Must be the first thing on the first line, starting with #!
+ // But since an attribute can also start with an #! we don't count it as a
+ // shebang line when after any whitespace or comments there is a [. If it
+ // is a shebang line we simple drop the line. Otherwise we don't consume
+ // any characters and fall through to the real tokenizer.
+ if (current_line == 1 && current_column == 1 && current_char == '#'
+ && peek_input () == '!')
{
- current_char = peek_input ();
-
- if (current_char == '!')
+ int n = 1;
+ while (true)
{
- skip_input ();
- current_char = peek_input ();
-
- if (current_char == '/')
+ int next_char = peek_input (n);
+ if (is_whitespace (next_char))
+ n++;
+ else if (next_char == '/' && peek_input (n + 1) == '/')
{
- // definitely shebang
-
- skip_input ();
-
- // ignore rest of line
- while (current_char != '\n')
+ // A single line comment
+ n += 2;
+ next_char = peek_input (n);
+ while (next_char != '\n' && next_char != EOF)
+ {
+ n++;
+ next_char = peek_input (n);
+ }
+ if (next_char == '\n')
+ n++;
+ }
+ else if (next_char == '/' && peek_input (n + 1) == '*')
+ {
+ // Start of a block comment
+ n += 2;
+ int level = 1;
+ while (level > 0)
+ {
+ if (peek_input (n) == EOF)
+ break;
+ else if (peek_input (n) == '/'
+ && peek_input (n + 1) == '*')
+ {
+ n += 2;
+ level += 1;
+ }
+ else if (peek_input (n) == '*'
+ && peek_input (n + 1) == '/')
+ {
+ n += 2;
+ level -= 1;
+ }
+ else
+ n++;
+ }
+ }
+ else if (next_char != '[')
+ {
+ // definitely shebang, ignore the first line
+ while (current_char != '\n' && current_char != EOF)
{
current_char = peek_input ();
skip_input ();
@@ -269,11 +304,17 @@ Lexer::build_token ()
current_column = 1;
// tell line_table that new line starts
line_map->start_line (current_line, max_column_hint);
- continue;
+ break;
}
+ else
+ break; /* Definitely not a shebang line. */
}
}
+ // return end of file token if end of file
+ if (current_char == EOF)
+ return Token::make (END_OF_FILE, loc);
+
// if not end of file, start tokenising
switch (current_char)
{