diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2021-07-12 11:10:07 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-12 11:10:07 +0000 |
commit | 9415a4b712371706d7f130695fd959fddb6506cc (patch) | |
tree | 8a82f15833bf654a6ba2b1c13ab1c9b5cf5edf67 /gcc | |
parent | 7cecfe983b389fb05b339ea9be71ade9e016dbb9 (diff) | |
parent | 876f70e905ac30e9517bd99d8ed898a16191c1c5 (diff) | |
download | gcc-9415a4b712371706d7f130695fd959fddb6506cc.zip gcc-9415a4b712371706d7f130695fd959fddb6506cc.tar.gz gcc-9415a4b712371706d7f130695fd959fddb6506cc.tar.bz2 |
Merge #561
561: Handle doc comments strings r=philberty a=philberty
This adds support for inner and outer doc comments within the parser.
Co-authored-by: Mark Wielaard <mark@klomp.org>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/rust/ast/rust-ast.h | 2 | ||||
-rw-r--r-- | gcc/rust/lex/rust-lex.cc | 214 | ||||
-rw-r--r-- | gcc/rust/lex/rust-token.h | 25 | ||||
-rw-r--r-- | gcc/rust/parse/rust-parse-impl.h | 60 | ||||
-rw-r--r-- | gcc/rust/parse/rust-parse.h | 1 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/bad_inner_doc.rs | 15 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/doc_isolated_cr_block_comment.rs | 3 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/doc_isolated_cr_inner_block_comment.rs | 5 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/doc_isolated_cr_inner_line_comment.rs | 5 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/doc_isolated_cr_line_comment.rs | 3 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/all_doc_comment_line_blocks.rs | 47 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/all_doc_comment_line_blocks_crlf.rs | 47 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/isolated_cr_block_comment.rs | 2 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/isolated_cr_line_comment.rs | 2 |
14 files changed, 401 insertions, 30 deletions
diff --git a/gcc/rust/ast/rust-ast.h b/gcc/rust/ast/rust-ast.h index 75b08f8..3e3e185b 100644 --- a/gcc/rust/ast/rust-ast.h +++ b/gcc/rust/ast/rust-ast.h @@ -455,6 +455,8 @@ public: // Returns whether the attribute is considered an "empty" attribute. bool is_empty () const { return attr_input == nullptr && path.is_empty (); } + Location get_locus () const { return locus; } + /* e.g.: #![crate_type = "lib"] #[test] diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 617dd69..0b8a8ea 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -265,9 +265,16 @@ Lexer::build_token () int next_char = peek_input (n); if (is_whitespace (next_char)) n++; - else if (next_char == '/' && peek_input (n + 1) == '/') + else if ((next_char == '/' && peek_input (n + 1) == '/' + && peek_input (n + 2) != '!' + && peek_input (n + 2) != '/') + || (next_char == '/' && peek_input (n + 1) == '/' + && peek_input (n + 2) == '/' + && peek_input (n + 3) == '/')) { + // two // or four //// // A single line comment + // (but not an inner or outer doc comment) n += 2; next_char = peek_input (n); while (next_char != '\n' && next_char != EOF) @@ -278,9 +285,30 @@ Lexer::build_token () if (next_char == '\n') n++; } - else if (next_char == '/' && peek_input (n + 1) == '*') + else if (next_char == '/' && peek_input (n + 1) == '*' + && peek_input (n + 2) == '*' + && peek_input (n + 3) == '/') { + /**/ + n += 4; + } + else if (next_char == '/' && peek_input (n + 1) == '*' + && peek_input (n + 2) == '*' && peek_input (n + 3) == '*' + && peek_input (n + 4) == '/') + { + /***/ + n += 5; + } + else if ((next_char == '/' && peek_input (n + 1) == '*' + && peek_input (n + 2) != '*' + && peek_input (n + 2) != '!') + || (next_char == '/' && peek_input (n + 1) == '*' + && peek_input (n + 2) == '*' + && peek_input (n + 3) == '*')) + { + // one /* or three /*** // Start of a block comment + // (but not an inner or outer doc comment) n += 2; int level = 1; while (level > 0) @@ -339,6 +367,9 @@ Lexer::build_token () // tell line_table that new line starts line_map->start_line (current_line, max_column_hint); continue; + case '\r': // cr + // Ignore, we expect a newline (lf) soon. + continue; case ' ': // space current_column++; continue; @@ -445,11 +476,14 @@ Lexer::build_token () return Token::make (DIV_EQ, loc); } - else if (peek_input () == '/') + else if ((peek_input () == '/' && peek_input (1) != '!' + && peek_input (1) != '/') + || (peek_input () == '/' && peek_input (1) == '/' + && peek_input (2) == '/')) { - // TODO: single-line doc comments - + // two // or four //// // single line comment + // (but not an inner or outer doc comment) skip_input (); current_column += 2; @@ -461,23 +495,85 @@ Lexer::build_token () current_char = peek_input (); } continue; - break; } - else if (peek_input () == '*') + else if (peek_input () == '/' + && (peek_input (1) == '!' || peek_input (1) == '/')) { + /* single line doc comment, inner or outer. */ + bool is_inner = peek_input (1) == '!'; + skip_input (1); + current_column += 3; + + std::string str; + str.reserve (32); + current_char = peek_input (); + while (current_char != '\n') + { + skip_input (); + if (current_char == '\r') + { + char next_char = peek_input (); + if (next_char == '\n') + { + current_char = '\n'; + break; + } + rust_error_at ( + loc, "Isolated CR %<\\r%> not allowed in doc comment"); + current_char = next_char; + continue; + } + if (current_char == EOF) + { + rust_error_at ( + loc, "unexpected EOF while looking for end of comment"); + break; + } + str += current_char; + current_char = peek_input (); + } + skip_input (); + current_line++; + current_column = 1; + // tell line_table that new line starts + line_map->start_line (current_line, max_column_hint); + + str.shrink_to_fit (); + if (is_inner) + return Token::make_inner_doc_comment (loc, std::move (str)); + else + return Token::make_outer_doc_comment (loc, std::move (str)); + } + else if (peek_input () == '*' && peek_input (1) == '*' + && peek_input (2) == '/') + { + /**/ + skip_input (2); + current_column += 4; + continue; + } + else if (peek_input () == '*' && peek_input (1) == '*' + && peek_input (2) == '*' && peek_input (3) == '/') + { + /***/ + skip_input (3); + current_column += 5; + continue; + } + else if ((peek_input () == '*' && peek_input (1) != '!' + && peek_input (1) != '*') + || (peek_input () == '*' && peek_input (1) == '*' + && peek_input (2) == '*')) + { + // one /* or three /*** // block comment + // (but not an inner or outer doc comment) skip_input (); current_column += 2; - // TODO: block doc comments - - current_char = peek_input (); - int level = 1; while (level > 0) { - skip_input (); - current_column++; // for error-handling current_char = peek_input (); if (current_char == EOF) @@ -496,6 +592,7 @@ Lexer::build_token () current_column += 2; level += 1; + continue; } // ignore until */ is found @@ -505,16 +602,101 @@ Lexer::build_token () skip_input (1); current_column += 2; - // should only break inner loop here - seems to do so - // break; level -= 1; + continue; } + + if (current_char == '\n') + { + skip_input (); + current_line++; + current_column = 1; + // tell line_table that new line starts + line_map->start_line (current_line, max_column_hint); + continue; + } + + skip_input (); + current_column++; } // refresh new token continue; - break; + } + else if (peek_input () == '*' + && (peek_input (1) == '!' || peek_input (1) == '*')) + { + // block doc comment, inner /*! or outer /** + bool is_inner = peek_input (1) == '!'; + skip_input (1); + current_column += 3; + + std::string str; + str.reserve (96); + + int level = 1; + while (level > 0) + { + current_char = peek_input (); + + if (current_char == EOF) + { + rust_error_at ( + loc, "unexpected EOF while looking for end of comment"); + break; + } + + // if /* found + if (current_char == '/' && peek_input (1) == '*') + { + // skip /* characters + skip_input (1); + current_column += 2; + + level += 1; + str += "/*"; + continue; + } + + // ignore until */ is found + if (current_char == '*' && peek_input (1) == '/') + { + // skip */ characters + skip_input (1); + current_column += 2; + + level -= 1; + if (level > 0) + str += "*/"; + continue; + } + + if (current_char == '\r' && peek_input (1) != '\n') + rust_error_at ( + loc, "Isolated CR %<\\r%> not allowed in doc comment"); + + if (current_char == '\n') + { + skip_input (); + current_line++; + current_column = 1; + // tell line_table that new line starts + line_map->start_line (current_line, max_column_hint); + str += '\n'; + continue; + } + + str += current_char; + skip_input (); + current_column++; + } + + str.shrink_to_fit (); + if (is_inner) + return Token::make_inner_doc_comment (loc, std::move (str)); + else + return Token::make_outer_doc_comment (loc, std::move (str)); } else { diff --git a/gcc/rust/lex/rust-token.h b/gcc/rust/lex/rust-token.h index 7719101..1c397c8 100644 --- a/gcc/rust/lex/rust-token.h +++ b/gcc/rust/lex/rust-token.h @@ -151,15 +151,10 @@ enum PrimitiveCoreType RS_TOKEN (RIGHT_SQUARE, "]") \ /* Macros */ \ RS_TOKEN (DOLLAR_SIGN, "$") \ - /* Comments */ \ - RS_TOKEN (LINE_COMMENT, "//") \ - RS_TOKEN (INNER_LINE_DOC, "//!") \ - RS_TOKEN (OUTER_LINE_DOC, "///") \ - RS_TOKEN (BLOCK_COMMENT_START, "/*") \ - RS_TOKEN (BLOCK_COMMENT_END, "*/") \ - RS_TOKEN (INNER_BLOCK_DOC_START, "/*!") \ - RS_TOKEN (OUTER_BLOCK_DOC_START, \ - "/**") /* have "weak" union and 'static keywords? */ \ + /* Doc Comments */ \ + RS_TOKEN (INNER_DOC_COMMENT, "#![doc]") \ + RS_TOKEN (OUTER_DOC_COMMENT, "#[doc]") \ + /* have "weak" union and 'static keywords? */ \ \ RS_TOKEN_KEYWORD (ABSTRACT, "abstract") /* unused */ \ RS_TOKEN_KEYWORD (AS, "as") \ @@ -368,6 +363,18 @@ public: return TokenPtr (new Token (BYTE_STRING_LITERAL, locus, std::move (str))); } + // Makes and returns a new TokenPtr of type INNER_DOC_COMMENT. + static TokenPtr make_inner_doc_comment (Location locus, std::string &&str) + { + return TokenPtr (new Token (INNER_DOC_COMMENT, locus, std::move (str))); + } + + // Makes and returns a new TokenPtr of type OUTER_DOC_COMMENT. + static TokenPtr make_outer_doc_comment (Location locus, std::string &&str) + { + return TokenPtr (new Token (OUTER_DOC_COMMENT, locus, std::move (str))); + } + // Makes and returns a new TokenPtr of type LIFETIME. static TokenPtr make_lifetime (Location locus, std::string &&str) { diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h index a8597fa..eedc76d 100644 --- a/gcc/rust/parse/rust-parse-impl.h +++ b/gcc/rust/parse/rust-parse-impl.h @@ -434,8 +434,9 @@ Parser<ManagedTokenSource>::parse_inner_attributes () AST::AttrVec inner_attributes; // only try to parse it if it starts with "#!" not only "#" - while (lexer.peek_token ()->get_id () == HASH - && lexer.peek_token (1)->get_id () == EXCLAM) + while ((lexer.peek_token ()->get_id () == HASH + && lexer.peek_token (1)->get_id () == EXCLAM) + || lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) { AST::Attribute inner_attr = parse_inner_attribute (); @@ -457,11 +458,33 @@ Parser<ManagedTokenSource>::parse_inner_attributes () return inner_attributes; } +// Parse a inner or outer doc comment into an doc attribute +template <typename ManagedTokenSource> +AST::Attribute +Parser<ManagedTokenSource>::parse_doc_comment () +{ + const_TokenPtr token = lexer.peek_token (); + Location locus = token->get_locus (); + AST::SimplePathSegment segment ("doc", locus); + std::vector<AST::SimplePathSegment> segments; + segments.push_back (std::move (segment)); + AST::SimplePath attr_path (std::move (segments), false, locus); + AST::LiteralExpr lit_expr (token->get_str (), AST::Literal::STRING, + PrimitiveCoreType::CORETYPE_STR, {}, locus); + std::unique_ptr<AST::AttrInput> attr_input ( + new AST::AttrInputLiteral (std::move (lit_expr))); + lexer.skip_token (); + return AST::Attribute (std::move (attr_path), std::move (attr_input), locus); +} + // Parse a single inner attribute. template <typename ManagedTokenSource> AST::Attribute Parser<ManagedTokenSource>::parse_inner_attribute () { + if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) + return parse_doc_comment (); + if (lexer.peek_token ()->get_id () != HASH) { Error error (lexer.peek_token ()->get_locus (), @@ -1019,7 +1042,15 @@ Parser<ManagedTokenSource>::parse_item (bool called_from_statement) switch (t->get_id ()) { case END_OF_FILE: - // not necessarily an error + // not necessarily an error, unless we just read outer + // attributes which needs to be attached + if (!outer_attrs.empty ()) + { + Rust::AST::Attribute attr = outer_attrs.back (); + Error error (attr.get_locus (), + "expected item after outer attribute or doc comment"); + add_error (std::move (error)); + } return nullptr; case PUB: case MOD: @@ -1091,7 +1122,11 @@ Parser<ManagedTokenSource>::parse_outer_attributes () { AST::AttrVec outer_attributes; - while (lexer.peek_token ()->get_id () == HASH) + while (lexer.peek_token ()->get_id () + == HASH /* Can also be #!, which catches errors. */ + || lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT + || lexer.peek_token ()->get_id () + == INNER_DOC_COMMENT) /* For error handling. */ { AST::Attribute outer_attr = parse_outer_attribute (); @@ -1121,6 +1156,20 @@ template <typename ManagedTokenSource> AST::Attribute Parser<ManagedTokenSource>::parse_outer_attribute () { + if (lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT) + return parse_doc_comment (); + + if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) + { + Error error ( + lexer.peek_token ()->get_locus (), + "inner doc (%<//!%> or %</*!%>) only allowed at start of item " + "and before any outer attribute or doc (%<#[%>, %<///%> or %</**%>)"); + add_error (std::move (error)); + lexer.skip_token (); + return AST::Attribute::create_empty (); + } + /* OuterAttribute -> '#' '[' Attr ']' */ if (lexer.peek_token ()->get_id () != HASH) @@ -1134,12 +1183,13 @@ Parser<ManagedTokenSource>::parse_outer_attribute () if (id == EXCLAM) { // this is inner attribute syntax, so throw error + // inner attributes were either already parsed or not allowed here. Error error ( lexer.peek_token ()->get_locus (), "token %<!%> found, indicating inner attribute definition. Inner " "attributes are not possible at this location"); add_error (std::move (error)); - } // TODO: are there any cases where this wouldn't be an error? + } return AST::Attribute::create_empty (); } diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h index bde2613..1cd85ea 100644 --- a/gcc/rust/parse/rust-parse.h +++ b/gcc/rust/parse/rust-parse.h @@ -107,6 +107,7 @@ private: AST::Attribute parse_outer_attribute (); AST::Attribute parse_attribute_body (); std::unique_ptr<AST::AttrInput> parse_attr_input (); + AST::Attribute parse_doc_comment (); // Path-related AST::SimplePath parse_simple_path (); diff --git a/gcc/testsuite/rust/compile/bad_inner_doc.rs b/gcc/testsuite/rust/compile/bad_inner_doc.rs new file mode 100644 index 0000000..cfd166c --- /dev/null +++ b/gcc/testsuite/rust/compile/bad_inner_doc.rs @@ -0,0 +1,15 @@ +pub fn main () +{ + //! inner doc allowed + let _x = 42; + // { dg-error "inner doc" "" { target *-*-* } .+1 } + //! inner doc disallowed + mod module + { + /*! inner doc allowed */ + /// outer doc allowed + // { dg-error "inner doc" "" { target *-*-* } .+1 } + /*! but inner doc not here */ + mod x { } + } +} diff --git a/gcc/testsuite/rust/compile/doc_isolated_cr_block_comment.rs b/gcc/testsuite/rust/compile/doc_isolated_cr_block_comment.rs new file mode 100644 index 0000000..0ada77f --- /dev/null +++ b/gcc/testsuite/rust/compile/doc_isolated_cr_block_comment.rs @@ -0,0 +1,3 @@ +// { dg-error "Isolated CR" "" { target *-*-* } .+1 } +/** doc cr
comment */ +pub fn main () { } diff --git a/gcc/testsuite/rust/compile/doc_isolated_cr_inner_block_comment.rs b/gcc/testsuite/rust/compile/doc_isolated_cr_inner_block_comment.rs new file mode 100644 index 0000000..7db3534 --- /dev/null +++ b/gcc/testsuite/rust/compile/doc_isolated_cr_inner_block_comment.rs @@ -0,0 +1,5 @@ +pub fn main () +{ +// { dg-error "Isolated CR" "" { target *-*-* } .+1 } + /*! doc cr
comment */ +} diff --git a/gcc/testsuite/rust/compile/doc_isolated_cr_inner_line_comment.rs b/gcc/testsuite/rust/compile/doc_isolated_cr_inner_line_comment.rs new file mode 100644 index 0000000..d75da75 --- /dev/null +++ b/gcc/testsuite/rust/compile/doc_isolated_cr_inner_line_comment.rs @@ -0,0 +1,5 @@ +pub fn main () +{ +// { dg-error "Isolated CR" "" { target *-*-* } .+1 } + //! doc cr
comment +} diff --git a/gcc/testsuite/rust/compile/doc_isolated_cr_line_comment.rs b/gcc/testsuite/rust/compile/doc_isolated_cr_line_comment.rs new file mode 100644 index 0000000..7b6ef98 --- /dev/null +++ b/gcc/testsuite/rust/compile/doc_isolated_cr_line_comment.rs @@ -0,0 +1,3 @@ +// { dg-error "Isolated CR" "" { target *-*-* } .+1 } +/// doc cr
comment +pub fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/all_doc_comment_line_blocks.rs b/gcc/testsuite/rust/compile/torture/all_doc_comment_line_blocks.rs new file mode 100644 index 0000000..ab38ac6 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/all_doc_comment_line_blocks.rs @@ -0,0 +1,47 @@ +// comment line not a doc +/* comment block not a doc */ + +//! inner line comment for most outer crate +/*! inner block comment for most outer crate */ + +// comment line not a doc +/* comment block not a doc */ + +/// outer doc line for module +/** outer doc block for module */ +pub mod module +{ + //! inner line doc + //!! inner line doc! + /*! inner block doc */ + /*!! inner block doc! */ + + // line comment + /// outer line doc + //// line comment + + /* block comment */ + /** outer block doc */ + /*** block comment */ + + mod block_doc_comments + { + /* /* */ /** */ /*! */ */ + /*! /* */ /** */ /*! */ */ + /** /* */ /** */ /*! */ */ + mod item { } + } + + pub mod empty + { + //! + /*!*/ + // + + /// + mod doc { } + /**/ + /***/ + } +} +pub fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/all_doc_comment_line_blocks_crlf.rs b/gcc/testsuite/rust/compile/torture/all_doc_comment_line_blocks_crlf.rs new file mode 100644 index 0000000..3ea2cd0 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/all_doc_comment_line_blocks_crlf.rs @@ -0,0 +1,47 @@ +// comment line not a doc
+/* comment block not a doc */
+
+//! inner line comment for most outer crate
+/*! inner block comment for most outer crate */
+
+// comment line not a doc
+/* comment block not a doc */
+
+/// outer doc line for module
+/** outer doc block for module */
+pub mod module
+{
+ //! inner line doc
+ //!! inner line doc!
+ /*! inner block doc */
+ /*!! inner block doc! */
+
+ // line comment
+ /// outer line doc
+ //// line comment
+
+ /* block comment */
+ /** outer block doc */
+ /*** block comment */
+
+ mod block_doc_comments
+ {
+ /* /* */ /** */ /*! */ */
+ /*! /* */ /** */ /*! */ */
+ /** /* */ /** */ /*! */ */
+ mod item { }
+ }
+
+ pub mod empty
+ {
+ //!
+ /*!*/
+ //
+
+ ///
+ mod doc { }
+ /**/
+ /***/
+ }
+}
+pub fn main () { }
diff --git a/gcc/testsuite/rust/compile/torture/isolated_cr_block_comment.rs b/gcc/testsuite/rust/compile/torture/isolated_cr_block_comment.rs new file mode 100644 index 0000000..9a1e090 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/isolated_cr_block_comment.rs @@ -0,0 +1,2 @@ +/* comment cr
is allowed */ +pub fn main () { } diff --git a/gcc/testsuite/rust/compile/torture/isolated_cr_line_comment.rs b/gcc/testsuite/rust/compile/torture/isolated_cr_line_comment.rs new file mode 100644 index 0000000..4e921a2 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/isolated_cr_line_comment.rs @@ -0,0 +1,2 @@ +// comment cr
is allowed +pub fn main () { } |