aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorbors[bot] <26634292+bors[bot]@users.noreply.github.com>2021-07-05 14:49:56 +0000
committerGitHub <noreply@github.com>2021-07-05 14:49:56 +0000
commit27b3d34428801397e562b7fcc5ca10b13961f3e1 (patch)
treec88d5c958ebb9f5a80d2ab7945d6cf0f29181553 /gcc
parent3c7c2f427c8cc69c67f53281391f954df212914a (diff)
parentafe6eb7d16b1c2336381c74e7d416c57129e88c6 (diff)
downloadgcc-27b3d34428801397e562b7fcc5ca10b13961f3e1.zip
gcc-27b3d34428801397e562b7fcc5ca10b13961f3e1.tar.gz
gcc-27b3d34428801397e562b7fcc5ca10b13961f3e1.tar.bz2
Merge #546
546: shebang handling r=philberty a=dkm Mark Wielaard: > Shebang handling, the first line starting with #! was not done fully > correct and it isn't necessary to keep track of the shebang line in > the AST or HIR Crate classes. > > Because an inner attribute also starts with #! the first line isn't > regarded as a shebang line if the #! is followed by (optional) > whitespace and comments and a [. In that case the #! is seen as the > start of an inner attribute. > > I added various testcases that hopefully show the funny things you can > get when the first line starts with #!. Co-authored-by: Mark Wielaard <mark@klomp.org>
Diffstat (limited to 'gcc')
-rw-r--r--gcc/rust/ast/rust-ast-full-test.cc5
-rw-r--r--gcc/rust/ast/rust-ast.h13
-rw-r--r--gcc/rust/hir/rust-ast-lower.cc3
-rw-r--r--gcc/rust/hir/tree/rust-hir-full-test.cc6
-rw-r--r--gcc/rust/hir/tree/rust-hir.h14
-rw-r--r--gcc/rust/lex/rust-lex.cc79
-rw-r--r--gcc/rust/parse/rust-parse-impl.h12
-rw-r--r--gcc/testsuite/rust/compile/torture/not_shebang.rs3
-rw-r--r--gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs1
-rw-r--r--gcc/testsuite/rust/compile/torture/not_shebang_comment.rs3
-rw-r--r--gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs7
-rw-r--r--gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs6
-rwxr-xr-xgcc/testsuite/rust/compile/torture/shebang.rs3
-rwxr-xr-xgcc/testsuite/rust/compile/torture/shebang_plus_attr.rs3
-rwxr-xr-xgcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs3
15 files changed, 107 insertions, 54 deletions
diff --git a/gcc/rust/ast/rust-ast-full-test.cc b/gcc/rust/ast/rust-ast-full-test.cc
index 3d339ad1..12ef255 100644
--- a/gcc/rust/ast/rust-ast-full-test.cc
+++ b/gcc/rust/ast/rust-ast-full-test.cc
@@ -172,13 +172,10 @@ Crate::as_string () const
rust_debug ("beginning crate recursive as-string");
std::string str ("Crate: ");
- // add utf8bom and shebang
+ // add utf8bom
if (has_utf8bom)
str += "\n has utf8bom";
- if (has_shebang)
- str += "\n has shebang";
-
// inner attributes
str += append_attributes (inner_attrs, INNER);
diff --git a/gcc/rust/ast/rust-ast.h b/gcc/rust/ast/rust-ast.h
index 0e25de2..ce55e1b 100644
--- a/gcc/rust/ast/rust-ast.h
+++ b/gcc/rust/ast/rust-ast.h
@@ -1551,7 +1551,6 @@ protected:
struct Crate
{
bool has_utf8bom;
- bool has_shebang;
std::vector<Attribute> inner_attrs;
// dodgy spacing required here
@@ -1564,17 +1563,16 @@ struct Crate
public:
// Constructor
Crate (std::vector<std::unique_ptr<Item> > items,
- std::vector<Attribute> inner_attrs, bool has_utf8bom = false,
- bool has_shebang = false)
- : has_utf8bom (has_utf8bom), has_shebang (has_shebang),
- inner_attrs (std::move (inner_attrs)), items (std::move (items)),
+ std::vector<Attribute> inner_attrs, bool has_utf8bom = false)
+ : has_utf8bom (has_utf8bom), inner_attrs (std::move (inner_attrs)),
+ items (std::move (items)),
node_id (Analysis::Mappings::get ()->get_next_node_id ())
{}
// Copy constructor with vector clone
Crate (Crate const &other)
- : has_utf8bom (other.has_utf8bom), has_shebang (other.has_shebang),
- inner_attrs (other.inner_attrs), node_id (other.node_id)
+ : has_utf8bom (other.has_utf8bom), inner_attrs (other.inner_attrs),
+ node_id (other.node_id)
{
items.reserve (other.items.size ());
for (const auto &e : other.items)
@@ -1587,7 +1585,6 @@ public:
Crate &operator= (Crate const &other)
{
inner_attrs = other.inner_attrs;
- has_shebang = other.has_shebang;
has_utf8bom = other.has_utf8bom;
node_id = other.node_id;
diff --git a/gcc/rust/hir/rust-ast-lower.cc b/gcc/rust/hir/rust-ast-lower.cc
index c7222e2..0f3c86d 100644
--- a/gcc/rust/hir/rust-ast-lower.cc
+++ b/gcc/rust/hir/rust-ast-lower.cc
@@ -41,7 +41,6 @@ ASTLowering::go ()
{
std::vector<std::unique_ptr<HIR::Item> > items;
bool has_utf8bom = false;
- bool has_shebang = false;
for (auto it = astCrate.items.begin (); it != astCrate.items.end (); it++)
{
@@ -57,7 +56,7 @@ ASTLowering::go ()
UNKNOWN_LOCAL_DEFID);
return HIR::Crate (std::move (items), astCrate.get_inner_attrs (), mapping,
- has_utf8bom, has_shebang);
+ has_utf8bom);
}
// rust-ast-lower-block.h
diff --git a/gcc/rust/hir/tree/rust-hir-full-test.cc b/gcc/rust/hir/tree/rust-hir-full-test.cc
index 261b3af..051ba87 100644
--- a/gcc/rust/hir/tree/rust-hir-full-test.cc
+++ b/gcc/rust/hir/tree/rust-hir-full-test.cc
@@ -73,15 +73,11 @@ std::string
Crate::as_string () const
{
std::string str ("HIR::Crate: ");
- // add utf8bom and shebang
+ // add utf8bom
if (has_utf8bom)
{
str += "\n has utf8bom";
}
- if (has_shebang)
- {
- str += "\n has shebang";
- }
// inner attributes
str += "\n inner attributes: ";
diff --git a/gcc/rust/hir/tree/rust-hir.h b/gcc/rust/hir/tree/rust-hir.h
index 35dc71a..f918f2d 100644
--- a/gcc/rust/hir/tree/rust-hir.h
+++ b/gcc/rust/hir/tree/rust-hir.h
@@ -679,7 +679,6 @@ public:
struct Crate
{
bool has_utf8bom;
- bool has_shebang;
AST::AttrVec inner_attrs;
// dodgy spacing required here
@@ -692,17 +691,15 @@ struct Crate
public:
// Constructor
Crate (std::vector<std::unique_ptr<Item> > items, AST::AttrVec inner_attrs,
- Analysis::NodeMapping mappings, bool has_utf8bom = false,
- bool has_shebang = false)
- : has_utf8bom (has_utf8bom), has_shebang (has_shebang),
- inner_attrs (std::move (inner_attrs)), items (std::move (items)),
- mappings (mappings)
+ Analysis::NodeMapping mappings, bool has_utf8bom = false)
+ : has_utf8bom (has_utf8bom), inner_attrs (std::move (inner_attrs)),
+ items (std::move (items)), mappings (mappings)
{}
// Copy constructor with vector clone
Crate (Crate const &other)
- : has_utf8bom (other.has_utf8bom), has_shebang (other.has_shebang),
- inner_attrs (other.inner_attrs), mappings (other.mappings)
+ : has_utf8bom (other.has_utf8bom), inner_attrs (other.inner_attrs),
+ mappings (other.mappings)
{
items.reserve (other.items.size ());
for (const auto &e : other.items)
@@ -715,7 +712,6 @@ public:
Crate &operator= (Crate const &other)
{
inner_attrs = other.inner_attrs;
- has_shebang = other.has_shebang;
has_utf8bom = other.has_utf8bom;
mappings = other.mappings;
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index d138416..ebd69de 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -237,28 +237,63 @@ Lexer::build_token ()
current_char = peek_input ();
skip_input ();
- // return end of file token if end of file
- if (current_char == EOF)
- return Token::make (END_OF_FILE, loc);
-
// detect shebang
- if (loc == 1 && current_line == 1 && current_char == '#')
+ // Must be the first thing on the first line, starting with #!
+ // But since an attribute can also start with an #! we don't count it as a
+ // shebang line when after any whitespace or comments there is a [. If it
+ // is a shebang line we simple drop the line. Otherwise we don't consume
+ // any characters and fall through to the real tokenizer.
+ if (current_line == 1 && current_column == 1 && current_char == '#'
+ && peek_input () == '!')
{
- current_char = peek_input ();
-
- if (current_char == '!')
+ int n = 1;
+ while (true)
{
- skip_input ();
- current_char = peek_input ();
-
- if (current_char == '/')
+ int next_char = peek_input (n);
+ if (is_whitespace (next_char))
+ n++;
+ else if (next_char == '/' && peek_input (n + 1) == '/')
{
- // definitely shebang
-
- skip_input ();
-
- // ignore rest of line
- while (current_char != '\n')
+ // A single line comment
+ n += 2;
+ next_char = peek_input (n);
+ while (next_char != '\n' && next_char != EOF)
+ {
+ n++;
+ next_char = peek_input (n);
+ }
+ if (next_char == '\n')
+ n++;
+ }
+ else if (next_char == '/' && peek_input (n + 1) == '*')
+ {
+ // Start of a block comment
+ n += 2;
+ int level = 1;
+ while (level > 0)
+ {
+ if (peek_input (n) == EOF)
+ break;
+ else if (peek_input (n) == '/'
+ && peek_input (n + 1) == '*')
+ {
+ n += 2;
+ level += 1;
+ }
+ else if (peek_input (n) == '*'
+ && peek_input (n + 1) == '/')
+ {
+ n += 2;
+ level -= 1;
+ }
+ else
+ n++;
+ }
+ }
+ else if (next_char != '[')
+ {
+ // definitely shebang, ignore the first line
+ while (current_char != '\n' && current_char != EOF)
{
current_char = peek_input ();
skip_input ();
@@ -269,11 +304,17 @@ Lexer::build_token ()
current_column = 1;
// tell line_table that new line starts
line_map->start_line (current_line, max_column_hint);
- continue;
+ break;
}
+ else
+ break; /* Definitely not a shebang line. */
}
}
+ // return end of file token if end of file
+ if (current_char == EOF)
+ return Token::make (END_OF_FILE, loc);
+
// if not end of file, start tokenising
switch (current_char)
{
diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h
index 9f8282b..136b343 100644
--- a/gcc/rust/parse/rust-parse-impl.h
+++ b/gcc/rust/parse/rust-parse-impl.h
@@ -393,12 +393,11 @@ template <typename ManagedTokenSource>
AST::Crate
Parser<ManagedTokenSource>::parse_crate ()
{
- /* TODO: determine if has utf8bom and shebang. Currently, they are eliminated
- * by the lexing phase. Neither are useful for the compiler anyway, so maybe a
+ /* TODO: determine if has utf8bom. Currently, is eliminated
+ * by the lexing phase. Not useful for the compiler anyway, so maybe a
* better idea would be to eliminate
- * the has_utf8bom and has_shebang variables from the crate data structure. */
+ * the has_utf8bom variable from the crate data structure. */
bool has_utf8bom = false;
- bool has_shebang = false;
// parse inner attributes
AST::AttrVec inner_attrs = parse_inner_attributes ();
@@ -430,8 +429,7 @@ Parser<ManagedTokenSource>::parse_crate ()
for (const auto &error : error_table)
error.emit_error ();
- return AST::Crate (std::move (items), std::move (inner_attrs), has_utf8bom,
- has_shebang);
+ return AST::Crate (std::move (items), std::move (inner_attrs), has_utf8bom);
}
// Parse a contiguous block of inner attributes.
@@ -484,7 +482,7 @@ Parser<ManagedTokenSource>::parse_inner_attribute ()
if (lexer.peek_token ()->get_id () != EXCLAM)
{
Error error (lexer.peek_token ()->get_locus (),
- "expected %<!%> or %<[%> for inner attribute or shebang");
+ "expected %<!%> or %<[%> for inner attribute");
add_error (std::move (error));
return AST::Attribute::create_empty ();
diff --git a/gcc/testsuite/rust/compile/torture/not_shebang.rs b/gcc/testsuite/rust/compile/torture/not_shebang.rs
new file mode 100644
index 0000000..37e01b6
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang.rs
@@ -0,0 +1,3 @@
+#!
+[allow(unused)]
+fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs b/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs
new file mode 100644
index 0000000..662f650
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs
@@ -0,0 +1 @@
+#!/*/this/is/a/comment*/[allow(unused)] fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs b/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs
new file mode 100644
index 0000000..273ae4e
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs
@@ -0,0 +1,3 @@
+#!//this/is/a/comment
+[allow(unused)]
+fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs b/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs
new file mode 100644
index 0000000..86800b1
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs
@@ -0,0 +1,7 @@
+#!//this/is/a/comment
+
+/* Also a /* nested */
+ multiline // comment
+ with some more whitespace after, but then finally a [, so not a real #! line. */
+
+[allow(unused)] fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs b/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs
new file mode 100644
index 0000000..6b94a691
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs
@@ -0,0 +1,6 @@
+#!
+
+ [allow(unused)]
+
+ fn main () { }
+
diff --git a/gcc/testsuite/rust/compile/torture/shebang.rs b/gcc/testsuite/rust/compile/torture/shebang.rs
new file mode 100755
index 0000000..1c8b9c9
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/shebang.rs
@@ -0,0 +1,3 @@
+#!/usr/bin/env cat
+
+fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs b/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs
new file mode 100755
index 0000000..075bc6c
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs
@@ -0,0 +1,3 @@
+#!/usr/bin/env cat
+#![allow(unused)]
+fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs b/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs
new file mode 100755
index 0000000..ece8a52
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs
@@ -0,0 +1,3 @@
+#!//usr/bin/env cat
+#![allow(unused)]
+fn main () { }