diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2021-07-05 14:49:56 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-05 14:49:56 +0000 |
commit | 27b3d34428801397e562b7fcc5ca10b13961f3e1 (patch) | |
tree | c88d5c958ebb9f5a80d2ab7945d6cf0f29181553 /gcc | |
parent | 3c7c2f427c8cc69c67f53281391f954df212914a (diff) | |
parent | afe6eb7d16b1c2336381c74e7d416c57129e88c6 (diff) | |
download | gcc-27b3d34428801397e562b7fcc5ca10b13961f3e1.zip gcc-27b3d34428801397e562b7fcc5ca10b13961f3e1.tar.gz gcc-27b3d34428801397e562b7fcc5ca10b13961f3e1.tar.bz2 |
Merge #546
546: shebang handling r=philberty a=dkm
Mark Wielaard:
> Shebang handling, the first line starting with #! was not done fully
> correct and it isn't necessary to keep track of the shebang line in
> the AST or HIR Crate classes.
>
> Because an inner attribute also starts with #! the first line isn't
> regarded as a shebang line if the #! is followed by (optional)
> whitespace and comments and a [. In that case the #! is seen as the
> start of an inner attribute.
>
> I added various testcases that hopefully show the funny things you can
> get when the first line starts with #!.
Co-authored-by: Mark Wielaard <mark@klomp.org>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/rust/ast/rust-ast-full-test.cc | 5 | ||||
-rw-r--r-- | gcc/rust/ast/rust-ast.h | 13 | ||||
-rw-r--r-- | gcc/rust/hir/rust-ast-lower.cc | 3 | ||||
-rw-r--r-- | gcc/rust/hir/tree/rust-hir-full-test.cc | 6 | ||||
-rw-r--r-- | gcc/rust/hir/tree/rust-hir.h | 14 | ||||
-rw-r--r-- | gcc/rust/lex/rust-lex.cc | 79 | ||||
-rw-r--r-- | gcc/rust/parse/rust-parse-impl.h | 12 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/not_shebang.rs | 3 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs | 1 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/not_shebang_comment.rs | 3 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs | 7 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs | 6 | ||||
-rwxr-xr-x | gcc/testsuite/rust/compile/torture/shebang.rs | 3 | ||||
-rwxr-xr-x | gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs | 3 | ||||
-rwxr-xr-x | gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs | 3 |
15 files changed, 107 insertions, 54 deletions
diff --git a/gcc/rust/ast/rust-ast-full-test.cc b/gcc/rust/ast/rust-ast-full-test.cc index 3d339ad1..12ef255 100644 --- a/gcc/rust/ast/rust-ast-full-test.cc +++ b/gcc/rust/ast/rust-ast-full-test.cc @@ -172,13 +172,10 @@ Crate::as_string () const rust_debug ("beginning crate recursive as-string"); std::string str ("Crate: "); - // add utf8bom and shebang + // add utf8bom if (has_utf8bom) str += "\n has utf8bom"; - if (has_shebang) - str += "\n has shebang"; - // inner attributes str += append_attributes (inner_attrs, INNER); diff --git a/gcc/rust/ast/rust-ast.h b/gcc/rust/ast/rust-ast.h index 0e25de2..ce55e1b 100644 --- a/gcc/rust/ast/rust-ast.h +++ b/gcc/rust/ast/rust-ast.h @@ -1551,7 +1551,6 @@ protected: struct Crate { bool has_utf8bom; - bool has_shebang; std::vector<Attribute> inner_attrs; // dodgy spacing required here @@ -1564,17 +1563,16 @@ struct Crate public: // Constructor Crate (std::vector<std::unique_ptr<Item> > items, - std::vector<Attribute> inner_attrs, bool has_utf8bom = false, - bool has_shebang = false) - : has_utf8bom (has_utf8bom), has_shebang (has_shebang), - inner_attrs (std::move (inner_attrs)), items (std::move (items)), + std::vector<Attribute> inner_attrs, bool has_utf8bom = false) + : has_utf8bom (has_utf8bom), inner_attrs (std::move (inner_attrs)), + items (std::move (items)), node_id (Analysis::Mappings::get ()->get_next_node_id ()) {} // Copy constructor with vector clone Crate (Crate const &other) - : has_utf8bom (other.has_utf8bom), has_shebang (other.has_shebang), - inner_attrs (other.inner_attrs), node_id (other.node_id) + : has_utf8bom (other.has_utf8bom), inner_attrs (other.inner_attrs), + node_id (other.node_id) { items.reserve (other.items.size ()); for (const auto &e : other.items) @@ -1587,7 +1585,6 @@ public: Crate &operator= (Crate const &other) { inner_attrs = other.inner_attrs; - has_shebang = other.has_shebang; has_utf8bom = other.has_utf8bom; node_id = other.node_id; diff --git a/gcc/rust/hir/rust-ast-lower.cc b/gcc/rust/hir/rust-ast-lower.cc index c7222e2..0f3c86d 100644 --- a/gcc/rust/hir/rust-ast-lower.cc +++ b/gcc/rust/hir/rust-ast-lower.cc @@ -41,7 +41,6 @@ ASTLowering::go () { std::vector<std::unique_ptr<HIR::Item> > items; bool has_utf8bom = false; - bool has_shebang = false; for (auto it = astCrate.items.begin (); it != astCrate.items.end (); it++) { @@ -57,7 +56,7 @@ ASTLowering::go () UNKNOWN_LOCAL_DEFID); return HIR::Crate (std::move (items), astCrate.get_inner_attrs (), mapping, - has_utf8bom, has_shebang); + has_utf8bom); } // rust-ast-lower-block.h diff --git a/gcc/rust/hir/tree/rust-hir-full-test.cc b/gcc/rust/hir/tree/rust-hir-full-test.cc index 261b3af..051ba87 100644 --- a/gcc/rust/hir/tree/rust-hir-full-test.cc +++ b/gcc/rust/hir/tree/rust-hir-full-test.cc @@ -73,15 +73,11 @@ std::string Crate::as_string () const { std::string str ("HIR::Crate: "); - // add utf8bom and shebang + // add utf8bom if (has_utf8bom) { str += "\n has utf8bom"; } - if (has_shebang) - { - str += "\n has shebang"; - } // inner attributes str += "\n inner attributes: "; diff --git a/gcc/rust/hir/tree/rust-hir.h b/gcc/rust/hir/tree/rust-hir.h index 35dc71a..f918f2d 100644 --- a/gcc/rust/hir/tree/rust-hir.h +++ b/gcc/rust/hir/tree/rust-hir.h @@ -679,7 +679,6 @@ public: struct Crate { bool has_utf8bom; - bool has_shebang; AST::AttrVec inner_attrs; // dodgy spacing required here @@ -692,17 +691,15 @@ struct Crate public: // Constructor Crate (std::vector<std::unique_ptr<Item> > items, AST::AttrVec inner_attrs, - Analysis::NodeMapping mappings, bool has_utf8bom = false, - bool has_shebang = false) - : has_utf8bom (has_utf8bom), has_shebang (has_shebang), - inner_attrs (std::move (inner_attrs)), items (std::move (items)), - mappings (mappings) + Analysis::NodeMapping mappings, bool has_utf8bom = false) + : has_utf8bom (has_utf8bom), inner_attrs (std::move (inner_attrs)), + items (std::move (items)), mappings (mappings) {} // Copy constructor with vector clone Crate (Crate const &other) - : has_utf8bom (other.has_utf8bom), has_shebang (other.has_shebang), - inner_attrs (other.inner_attrs), mappings (other.mappings) + : has_utf8bom (other.has_utf8bom), inner_attrs (other.inner_attrs), + mappings (other.mappings) { items.reserve (other.items.size ()); for (const auto &e : other.items) @@ -715,7 +712,6 @@ public: Crate &operator= (Crate const &other) { inner_attrs = other.inner_attrs; - has_shebang = other.has_shebang; has_utf8bom = other.has_utf8bom; mappings = other.mappings; diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index d138416..ebd69de 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -237,28 +237,63 @@ Lexer::build_token () current_char = peek_input (); skip_input (); - // return end of file token if end of file - if (current_char == EOF) - return Token::make (END_OF_FILE, loc); - // detect shebang - if (loc == 1 && current_line == 1 && current_char == '#') + // Must be the first thing on the first line, starting with #! + // But since an attribute can also start with an #! we don't count it as a + // shebang line when after any whitespace or comments there is a [. If it + // is a shebang line we simple drop the line. Otherwise we don't consume + // any characters and fall through to the real tokenizer. + if (current_line == 1 && current_column == 1 && current_char == '#' + && peek_input () == '!') { - current_char = peek_input (); - - if (current_char == '!') + int n = 1; + while (true) { - skip_input (); - current_char = peek_input (); - - if (current_char == '/') + int next_char = peek_input (n); + if (is_whitespace (next_char)) + n++; + else if (next_char == '/' && peek_input (n + 1) == '/') { - // definitely shebang - - skip_input (); - - // ignore rest of line - while (current_char != '\n') + // A single line comment + n += 2; + next_char = peek_input (n); + while (next_char != '\n' && next_char != EOF) + { + n++; + next_char = peek_input (n); + } + if (next_char == '\n') + n++; + } + else if (next_char == '/' && peek_input (n + 1) == '*') + { + // Start of a block comment + n += 2; + int level = 1; + while (level > 0) + { + if (peek_input (n) == EOF) + break; + else if (peek_input (n) == '/' + && peek_input (n + 1) == '*') + { + n += 2; + level += 1; + } + else if (peek_input (n) == '*' + && peek_input (n + 1) == '/') + { + n += 2; + level -= 1; + } + else + n++; + } + } + else if (next_char != '[') + { + // definitely shebang, ignore the first line + while (current_char != '\n' && current_char != EOF) { current_char = peek_input (); skip_input (); @@ -269,11 +304,17 @@ Lexer::build_token () current_column = 1; // tell line_table that new line starts line_map->start_line (current_line, max_column_hint); - continue; + break; } + else + break; /* Definitely not a shebang line. */ } } + // return end of file token if end of file + if (current_char == EOF) + return Token::make (END_OF_FILE, loc); + // if not end of file, start tokenising switch (current_char) { diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h index 9f8282b..136b343 100644 --- a/gcc/rust/parse/rust-parse-impl.h +++ b/gcc/rust/parse/rust-parse-impl.h @@ -393,12 +393,11 @@ template <typename ManagedTokenSource> AST::Crate Parser<ManagedTokenSource>::parse_crate () { - /* TODO: determine if has utf8bom and shebang. Currently, they are eliminated - * by the lexing phase. Neither are useful for the compiler anyway, so maybe a + /* TODO: determine if has utf8bom. Currently, is eliminated + * by the lexing phase. Not useful for the compiler anyway, so maybe a * better idea would be to eliminate - * the has_utf8bom and has_shebang variables from the crate data structure. */ + * the has_utf8bom variable from the crate data structure. */ bool has_utf8bom = false; - bool has_shebang = false; // parse inner attributes AST::AttrVec inner_attrs = parse_inner_attributes (); @@ -430,8 +429,7 @@ Parser<ManagedTokenSource>::parse_crate () for (const auto &error : error_table) error.emit_error (); - return AST::Crate (std::move (items), std::move (inner_attrs), has_utf8bom, - has_shebang); + return AST::Crate (std::move (items), std::move (inner_attrs), has_utf8bom); } // Parse a contiguous block of inner attributes. @@ -484,7 +482,7 @@ Parser<ManagedTokenSource>::parse_inner_attribute () if (lexer.peek_token ()->get_id () != EXCLAM) { Error error (lexer.peek_token ()->get_locus (), - "expected %<!%> or %<[%> for inner attribute or shebang"); + "expected %<!%> or %<[%> for inner attribute"); add_error (std::move (error)); return AST::Attribute::create_empty (); diff --git a/gcc/testsuite/rust/compile/torture/not_shebang.rs b/gcc/testsuite/rust/compile/torture/not_shebang.rs new file mode 100644 index 0000000..37e01b6 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/not_shebang.rs @@ -0,0 +1,3 @@ +#! +[allow(unused)] +fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs b/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs new file mode 100644 index 0000000..662f650 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/not_shebang_block_comment.rs @@ -0,0 +1 @@ +#!/*/this/is/a/comment*/[allow(unused)] fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs b/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs new file mode 100644 index 0000000..273ae4e --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/not_shebang_comment.rs @@ -0,0 +1,3 @@ +#!//this/is/a/comment +[allow(unused)] +fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs b/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs new file mode 100644 index 0000000..86800b1 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/not_shebang_multiline_comment.rs @@ -0,0 +1,7 @@ +#!//this/is/a/comment + +/* Also a /* nested */ + multiline // comment + with some more whitespace after, but then finally a [, so not a real #! line. */ + +[allow(unused)] fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs b/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs new file mode 100644 index 0000000..6b94a691 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/not_shebang_spaces.rs @@ -0,0 +1,6 @@ +#! + + [allow(unused)] + + fn main () { } + diff --git a/gcc/testsuite/rust/compile/torture/shebang.rs b/gcc/testsuite/rust/compile/torture/shebang.rs new file mode 100755 index 0000000..1c8b9c9 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/shebang.rs @@ -0,0 +1,3 @@ +#!/usr/bin/env cat + +fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs b/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs new file mode 100755 index 0000000..075bc6c --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/shebang_plus_attr.rs @@ -0,0 +1,3 @@ +#!/usr/bin/env cat +#![allow(unused)] +fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs b/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs new file mode 100755 index 0000000..ece8a52 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/shebang_plus_attr2.rs @@ -0,0 +1,3 @@ +#!//usr/bin/env cat +#![allow(unused)] +fn main () { } |