// Copyright (C) 2020-2026 Free Software Foundation, Inc. // This file is part of GCC. // GCC is free software; you can redistribute it and/or modify it under // the terms of the GNU General Public License as published by the Free // Software Foundation; either version 3, or (at your option) any later // version. // GCC is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // You should have received a copy of the GNU General Public License // along with GCC; see the file COPYING3. If not see // . /* Template implementation for Rust::Parser. Previously in rust-parse.cc (before * Parser was template). Separated from rust-parse.h for readability. */ /* DO NOT INCLUDE ANYWHERE - this is automatically included * by rust-parse-impl-*.cc * This is also the reason why there are no include guards. */ #include "expected.h" #include "rust-ast.h" #include "rust-common.h" #include "rust-expr.h" #include "rust-item.h" #include "rust-common.h" #include "rust-parse.h" #include "rust-token.h" #define INCLUDE_ALGORITHM #include "rust-diagnostics.h" #include "rust-dir-owner.h" #include "rust-keyword-values.h" #include "rust-edition.h" #include "rust-parse-error.h" #include "optional.h" namespace Rust { /* HACK-y special handling for skipping a right angle token at the end of * generic arguments. * Currently, this replaces the "current token" with one that is identical * except has the leading '>' removed (e.g. '>>' becomes '>'). This is bad * for several reasons - it modifies the token stream to something that * actually doesn't make syntactic sense, it may not worked if the token * has already been skipped, etc. It was done because it would not * actually require inserting new items into the token stream (which I * thought would take more work to not mess up) and because I wasn't sure * if the "already seen right angle" flag in the parser would work * correctly. * Those two other approaches listed are in my opinion actually better * long-term - insertion is probably best as it reflects syntactically * what occurs. On the other hand, I need to do a code audit to make sure * that insertion doesn't mess anything up. So that's a FIXME. */ template bool Parser::skip_generics_right_angle () { /* OK, new great idea. Have a lexer method called * "split_current_token(TokenType newLeft, TokenType newRight)", which is * called here with whatever arguments are appropriate. That lexer method * handles "replacing" the current token with the "newLeft" and "inserting" * the next token with the "newRight" (and creating a location, etc. for it) */ /* HACK: special handling for right shift '>>', greater or equal '>=', and * right shift assig */ // '>>=' const_TokenPtr tok = lexer.peek_token (); switch (tok->get_id ()) { case RIGHT_ANGLE: // this is good - skip token lexer.skip_token (); return true; case RIGHT_SHIFT: { // new implementation that should be better lexer.split_current_token (RIGHT_ANGLE, RIGHT_ANGLE); lexer.skip_token (); return true; } case GREATER_OR_EQUAL: { // new implementation that should be better lexer.split_current_token (RIGHT_ANGLE, EQUAL); lexer.skip_token (); return true; } case RIGHT_SHIFT_EQ: { // new implementation that should be better lexer.split_current_token (RIGHT_ANGLE, GREATER_OR_EQUAL); lexer.skip_token (); return true; } default: add_error (Error (tok->get_locus (), "expected %<>%> at end of generic argument - found %qs", tok->get_token_description ())); return false; } } /* Gets left binding power for specified token. * Not suitable for use at the moment or possibly ever because binding power * cannot be purely determined from operator token with Rust grammar - e.g. * method call and field access have * different left binding powers but the same operator token. */ template int Parser::left_binding_power (const_TokenPtr token) { // HACK: called with "peek_token()", so lookahead is "peek_token(1)" switch (token->get_id ()) { /* TODO: issue here - distinguish between method calls and field access * somehow? Also would have to distinguish between paths and function * calls (:: operator), maybe more stuff. */ /* Current plan for tackling LBP - don't do it based on token, use * lookahead. Or alternatively, only use Pratt parsing for OperatorExpr * and handle other expressions without it. rustc only considers * arithmetic, logical/relational, 'as', * '?=', ranges, colons, and assignment to have operator precedence and * associativity rules applicable. It then has * a separate "ExprPrecedence" that also includes binary operators. */ // TODO: handle operator overloading - have a function replace the // operator? /*case DOT: return LBP_DOT;*/ case SCOPE_RESOLUTION: rust_debug ( "possible error - looked up LBP of scope resolution operator. should " "be handled elsewhere."); return LBP_PATH; /* Resolved by lookahead HACK that should work with current code. If next * token is identifier and token after that isn't parenthesised expression * list, it is a field reference. */ case DOT: if (lexer.peek_token (1)->get_id () == IDENTIFIER && lexer.peek_token (2)->get_id () != LEFT_PAREN) { return LBP_FIELD_EXPR; } return LBP_METHOD_CALL; case LEFT_PAREN: return LBP_FUNCTION_CALL; case LEFT_SQUARE: return LBP_ARRAY_REF; // postfix question mark (i.e. error propagation expression) case QUESTION_MARK: return LBP_QUESTION_MARK; case AS: return LBP_AS; case ASTERISK: return LBP_MUL; case DIV: return LBP_DIV; case PERCENT: return LBP_MOD; case PLUS: return LBP_PLUS; case MINUS: return LBP_MINUS; case LEFT_SHIFT: return LBP_L_SHIFT; case RIGHT_SHIFT: return LBP_R_SHIFT; // binary & operator case AMP: return LBP_AMP; // binary ^ operator case CARET: return LBP_CARET; // binary | operator case PIPE: return LBP_PIPE; case EQUAL_EQUAL: return LBP_EQUAL; case NOT_EQUAL: return LBP_NOT_EQUAL; case RIGHT_ANGLE: return LBP_GREATER_THAN; case GREATER_OR_EQUAL: return LBP_GREATER_EQUAL; case LEFT_ANGLE: return LBP_SMALLER_THAN; case LESS_OR_EQUAL: return LBP_SMALLER_EQUAL; case LOGICAL_AND: return LBP_LOGICAL_AND; case OR: return LBP_LOGICAL_OR; case DOT_DOT: return LBP_DOT_DOT; case DOT_DOT_EQ: return LBP_DOT_DOT_EQ; case EQUAL: return LBP_ASSIG; case PLUS_EQ: return LBP_PLUS_ASSIG; case MINUS_EQ: return LBP_MINUS_ASSIG; case ASTERISK_EQ: return LBP_MULT_ASSIG; case DIV_EQ: return LBP_DIV_ASSIG; case PERCENT_EQ: return LBP_MOD_ASSIG; case AMP_EQ: return LBP_AMP_ASSIG; case PIPE_EQ: return LBP_PIPE_ASSIG; case CARET_EQ: return LBP_CARET_ASSIG; case LEFT_SHIFT_EQ: return LBP_L_SHIFT_ASSIG; case RIGHT_SHIFT_EQ: return LBP_R_SHIFT_ASSIG; /* HACK: float literal due to lexer misidentifying a dot then an integer as * a float */ case FLOAT_LITERAL: return LBP_FIELD_EXPR; // field expr is same as tuple expr in precedence, i imagine // TODO: is this needed anymore? lexer shouldn't do that anymore // anything that can't appear in an infix position is given lowest priority default: return LBP_LOWEST; } } // Returns true when current token is EOF. template bool Parser::done_end_of_file () { return lexer.peek_token ()->get_id () == END_OF_FILE; } // Parses a sequence of items within a module or the implicit top-level module // in a crate template tl::expected>, Parse::Error::Items> Parser::parse_items () { std::vector> items; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != END_OF_FILE) { auto item = parse_item (false); if (!item) return Parse::Error::Items::make_malformed (std::move (items)); items.push_back (std::move (item.value ())); t = lexer.peek_token (); } // GCC 5->7 bug doesn't threat lvalue as an rvalue for the overload #if __GNUC__ <= 7 return std::move (items); #else return items; #endif } // Parses a crate (compilation unit) - entry point template std::unique_ptr Parser::parse_crate () { // parse inner attributes AST::AttrVec inner_attrs = parse_inner_attributes (); // parse items auto items = parse_items ().value_or (std::vector>{}); // emit all errors for (const auto &error : error_table) error.emit (); return std::unique_ptr ( new AST::Crate (std::move (items), std::move (inner_attrs))); } // Parses an identifier/keyword as a Token template tl::expected, Parse::Error::Node> Parser::parse_identifier_or_keyword_token () { const_TokenPtr t = lexer.peek_token (); if (t->get_id () == IDENTIFIER || token_id_is_keyword (t->get_id ())) { lexer.skip_token (); return std::unique_ptr (new AST::Token (std::move (t))); } else { add_error (Error (t->get_locus (), "expected keyword or identifier")); return tl::unexpected (Parse::Error::Node::MALFORMED); } } template bool Parser::is_macro_rules_def (const_TokenPtr t) { auto macro_name = lexer.peek_token (2)->get_id (); bool allowed_macro_name = (macro_name == IDENTIFIER || macro_name == TRY); return t->get_str () == Values::WeakKeywords::MACRO_RULES && lexer.peek_token (1)->get_id () == EXCLAM && allowed_macro_name; } // Parses a single item template tl::expected, Parse::Error::Item> Parser::parse_item (bool called_from_statement) { // has a "called_from_statement" parameter for better error message handling // TODO: GCC 5 does not handle implicit return type correctly so we're forced // to specify it almost every time until the baseline GCC gets bumped. // Since this type is quite long and the code is dense we use an alias. // // When support for GCC 5 stops: remove this alias as well as the explicit // ctor calls. using RType = tl::expected, Parse::Error::Item>; // parse outer attributes for item AST::AttrVec outer_attrs = parse_outer_attributes (); const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case END_OF_FILE: // not necessarily an error, unless we just read outer // attributes which needs to be attached if (!outer_attrs.empty ()) { Rust::AST::Attribute attr = outer_attrs.back (); Error error (attr.get_locus (), "expected item after outer attribute or doc comment"); add_error (std::move (error)); } return Parse::Error::Item::make_end_of_file (); case ASYNC: case PUB: case MOD: case EXTERN_KW: case USE: case FN_KW: case TYPE: case STRUCT_KW: case ENUM_KW: case CONST: case STATIC_KW: case AUTO: case TRAIT: case IMPL: case MACRO: /* TODO: implement union keyword but not really because of * context-dependence crappy hack way to parse a union written below to * separate it from the good code. */ // case UNION: case UNSAFE: // maybe - unsafe traits are a thing // if any of these (should be all possible VisItem prefixes), parse a // VisItem { auto vis_item = parse_vis_item (std::move (outer_attrs)); if (!vis_item) return Parse::Error::Item::make_malformed (); return RType{std::move (vis_item)}; } case SUPER: case SELF: case CRATE: case DOLLAR_SIGN: // almost certainly macro invocation semi { auto macro_invoc_semi = parse_macro_invocation_semi (std::move (outer_attrs)); if (!macro_invoc_semi) return Parse::Error::Item::make_malformed (); return RType{std::move (macro_invoc_semi)}; } // crappy hack to do union "keyword" case IDENTIFIER: // TODO: ensure std::string and literal comparison works if (t->get_str () == Values::WeakKeywords::UNION && lexer.peek_token (1)->get_id () == IDENTIFIER) { auto vis_item = parse_vis_item (std::move (outer_attrs)); if (!vis_item) return Parse::Error::Item::make_malformed (); return RType{std::move (vis_item)}; // or should this go straight to parsing union? } else if (t->get_str () == Values::WeakKeywords::DEFAULT && lexer.peek_token (1)->get_id () != EXCLAM) { // parse normal functions with `default` qualifier // they will be rejected in ASTValidation pass return parse_vis_item (std::move (outer_attrs)); } else if (is_macro_rules_def (t)) { // macro_rules! macro item auto macro_rule_def = parse_macro_rules_def (std::move (outer_attrs)); if (!macro_rule_def) return Parse::Error::Item::make_malformed (); return RType{std::move (macro_rule_def)}; } else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION || lexer.peek_token (1)->get_id () == EXCLAM) { /* path (probably) or macro invocation, so probably a macro invocation * semi */ auto macro_invocation_semi = parse_macro_invocation_semi (std::move (outer_attrs)); if (!macro_invocation_semi) return Parse::Error::Item::make_malformed (); return RType{std::move (macro_invocation_semi)}; } gcc_fallthrough (); default: // otherwise unrecognised add_error (Error (t->get_locus (), "unrecognised token %qs for start of %s", t->get_token_description (), called_from_statement ? "statement" : "item")); // skip somewhere? return Parse::Error::Item::make_malformed (); break; } } // Parses a VisItem (item that can have non-default visibility). template std::unique_ptr Parser::parse_vis_item (AST::AttrVec outer_attrs) { // parse visibility, which may or may not exist auto vis_res = parse_visibility (); if (!vis_res) return nullptr; auto vis = vis_res.value (); // select VisItem to create depending on keyword const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case MOD: return parse_module (std::move (vis), std::move (outer_attrs)); case EXTERN_KW: // lookahead to resolve syntactical production t = lexer.peek_token (1); switch (t->get_id ()) { case CRATE: return parse_extern_crate (std::move (vis), std::move (outer_attrs)); case FN_KW: // extern function return parse_function (std::move (vis), std::move (outer_attrs)); case LEFT_CURLY: // extern block return parse_extern_block (std::move (vis), std::move (outer_attrs)); case STRING_LITERAL: // for specifying extern ABI // could be extern block or extern function, so more lookahead t = lexer.peek_token (2); switch (t->get_id ()) { case FN_KW: return parse_function (std::move (vis), std::move (outer_attrs)); case LEFT_CURLY: return parse_extern_block (std::move (vis), std::move (outer_attrs)); default: add_error ( Error (t->get_locus (), "unexpected token %qs in some sort of extern production", t->get_token_description ())); lexer.skip_token (2); // TODO: is this right thing to do? return nullptr; } default: add_error ( Error (t->get_locus (), "unexpected token %qs in some sort of extern production", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } case USE: return parse_use_decl (std::move (vis), std::move (outer_attrs)); case FN_KW: return parse_function (std::move (vis), std::move (outer_attrs)); case TYPE: return parse_type_alias (std::move (vis), std::move (outer_attrs)); case STRUCT_KW: return parse_struct (std::move (vis), std::move (outer_attrs)); case ENUM_KW: return parse_enum (std::move (vis), std::move (outer_attrs)); // TODO: implement union keyword but not really because of // context-dependence case UNION: crappy hack to do union "keyword" case IDENTIFIER: if (t->get_str () == Values::WeakKeywords::UNION && lexer.peek_token (1)->get_id () == IDENTIFIER) { return parse_union (std::move (vis), std::move (outer_attrs)); // or should item switch go straight to parsing union? } else if (t->get_str () == Values::WeakKeywords::DEFAULT) { // parse normal functions with `default` qualifier they will be // rejected in ASTValidation pass return parse_function (std::move (vis), std::move (outer_attrs)); } break; case CONST: // lookahead to resolve syntactical production t = lexer.peek_token (1); switch (t->get_id ()) { case IDENTIFIER: case UNDERSCORE: return parse_const_item (std::move (vis), std::move (outer_attrs)); case ASYNC: return parse_async_item (std::move (vis), std::move (outer_attrs)); case UNSAFE: case EXTERN_KW: case FN_KW: return parse_function (std::move (vis), std::move (outer_attrs)); default: add_error ( Error (t->get_locus (), "unexpected token %qs in some sort of const production", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } // for async functions case ASYNC: return parse_async_item (std::move (vis), std::move (outer_attrs)); case STATIC_KW: return parse_static_item (std::move (vis), std::move (outer_attrs)); case AUTO: case TRAIT: return parse_trait (std::move (vis), std::move (outer_attrs)); case IMPL: return parse_impl (std::move (vis), std::move (outer_attrs)); case UNSAFE: // unsafe traits, unsafe functions, unsafe impls (trait impls), // lookahead to resolve syntactical production t = lexer.peek_token (1); switch (t->get_id ()) { case AUTO: case TRAIT: return parse_trait (std::move (vis), std::move (outer_attrs)); case EXTERN_KW: case FN_KW: return parse_function (std::move (vis), std::move (outer_attrs)); case IMPL: return parse_impl (std::move (vis), std::move (outer_attrs)); case MOD: return parse_module (std::move (vis), std::move (outer_attrs)); default: add_error ( Error (t->get_locus (), "unexpected token %qs in some sort of unsafe production", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } case MACRO: return parse_decl_macro_def (std::move (vis), std::move (outer_attrs)); default: // otherwise vis item clearly doesn't exist, which is not an error // has a catch-all post-switch return to allow other breaks to occur break; } return nullptr; } template std::unique_ptr Parser::parse_async_item (AST::Visibility vis, AST::AttrVec outer_attrs) { auto offset = (lexer.peek_token ()->get_id () == CONST) ? 1 : 0; const_TokenPtr t = lexer.peek_token (offset); if (get_rust_edition () == Edition::E2015) { add_error (Error (t->get_locus (), ErrorCode::E0670, "% is not permitted in Rust 2015")); add_error ( Error::Hint (t->get_locus (), "to use %, switch to Rust 2018 or later")); } t = lexer.peek_token (offset + 1); switch (t->get_id ()) { case UNSAFE: case FN_KW: return parse_function (std::move (vis), std::move (outer_attrs)); default: add_error ( Error (t->get_locus (), "expected item, found keyword %")); lexer.skip_token (1); return nullptr; } } // Parses a macro rules definition syntax extension whatever thing. template std::unique_ptr Parser::parse_macro_rules_def (AST::AttrVec outer_attrs) { // ensure that first token is identifier saying "macro_rules" const_TokenPtr t = lexer.peek_token (); if (t->get_id () != IDENTIFIER || t->get_str () != Values::WeakKeywords::MACRO_RULES) { Error error ( t->get_locus (), "macro rules definition does not start with %"); add_error (std::move (error)); // skip after somewhere? return nullptr; } lexer.skip_token (); location_t macro_locus = t->get_locus (); if (!skip_token (EXCLAM)) { // skip after somewhere? return nullptr; } // parse macro name const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) { return nullptr; } Identifier rule_name{ident_tok}; // DEBUG rust_debug ("in macro rules def, about to parse parens."); // save delim type to ensure it is reused later AST::DelimType delim_type = AST::PARENS; // Map tokens to DelimType t = lexer.peek_token (); switch (t->get_id ()) { case LEFT_PAREN: delim_type = AST::PARENS; break; case LEFT_SQUARE: delim_type = AST::SQUARE; break; case LEFT_CURLY: delim_type = AST::CURLY; break; default: add_error (Error (t->get_locus (), "unexpected token %qs - expecting delimiters (for a " "macro rules definition)", t->get_token_description ())); return nullptr; } lexer.skip_token (); // parse actual macro rules std::vector macro_rules; // must be at least one macro rule, so parse it AST::MacroRule initial_rule = parse_macro_rule (); if (initial_rule.is_error ()) { Error error (lexer.peek_token ()->get_locus (), "required first macro rule in macro rules definition " "could not be parsed"); add_error (std::move (error)); // skip after somewhere? return nullptr; } macro_rules.push_back (std::move (initial_rule)); // DEBUG rust_debug ("successfully pushed back initial macro rule"); t = lexer.peek_token (); // parse macro rules while (t->get_id () == SEMICOLON) { // skip semicolon lexer.skip_token (); // don't parse if end of macro rules if (Parse::Utils::token_id_matches_delims (lexer.peek_token ()->get_id (), delim_type)) { // DEBUG rust_debug ( "broke out of parsing macro rules loop due to finding delim"); break; } // try to parse next rule AST::MacroRule rule = parse_macro_rule (); if (rule.is_error ()) { Error error (lexer.peek_token ()->get_locus (), "failed to parse macro rule in macro rules definition"); add_error (std::move (error)); return nullptr; } macro_rules.push_back (std::move (rule)); // DEBUG rust_debug ("successfully pushed back another macro rule"); t = lexer.peek_token (); } // parse end delimiters t = lexer.peek_token (); if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) { // tokens match opening delimiter, so skip. lexer.skip_token (); if (delim_type != AST::CURLY) { // skip semicolon at end of non-curly macro definitions if (!skip_token (SEMICOLON)) { // as this is the end, allow recovery (probably) - may change return std::unique_ptr ( AST::MacroRulesDefinition::mbe ( std::move (rule_name), delim_type, std::move (macro_rules), std::move (outer_attrs), macro_locus)); } } return std::unique_ptr ( AST::MacroRulesDefinition::mbe (std::move (rule_name), delim_type, std::move (macro_rules), std::move (outer_attrs), macro_locus)); } else { // tokens don't match opening delimiters, so produce error Error error (t->get_locus (), "unexpected token %qs - expecting closing delimiter %qs " "(for a macro rules definition)", t->get_token_description (), (delim_type == AST::PARENS ? ")" : (delim_type == AST::SQUARE ? "]" : "}"))); add_error (std::move (error)); /* return empty macro definiton despite possibly parsing mostly valid one * - TODO is this a good idea? */ return nullptr; } } // Parses a declarative macro 2.0 definition. template std::unique_ptr Parser::parse_decl_macro_def (AST::Visibility vis, AST::AttrVec outer_attrs) { // ensure that first token is identifier saying "macro" const_TokenPtr t = lexer.peek_token (); if (t->get_id () != MACRO) { Error error ( t->get_locus (), "declarative macro definition does not start with %"); add_error (std::move (error)); // skip after somewhere? return nullptr; } lexer.skip_token (); location_t macro_locus = t->get_locus (); // parse macro name const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) { return nullptr; } Identifier rule_name{ident_tok}; t = lexer.peek_token (); if (t->get_id () == LEFT_PAREN) { // single definiton of macro rule // e.g. `macro foo($e:expr) {}` // parse macro matcher location_t locus = lexer.peek_token ()->get_locus (); AST::MacroMatcher matcher = parse_macro_matcher (); if (matcher.is_error ()) return nullptr; // check delimiter of macro matcher if (matcher.get_delim_type () != AST::DelimType::PARENS) { Error error (locus, "only parenthesis can be used for a macro " "matcher in declarative macro definition"); add_error (std::move (error)); return nullptr; } location_t transcriber_loc = lexer.peek_token ()->get_locus (); auto delim_tok_tree = parse_delim_token_tree (); if (!delim_tok_tree) return nullptr; AST::MacroTranscriber transcriber (delim_tok_tree.value (), transcriber_loc); if (transcriber.get_token_tree ().get_delim_type () != AST::DelimType::CURLY) { Error error (transcriber_loc, "only braces can be used for a macro transcriber " "in declarative macro definition"); add_error (std::move (error)); return nullptr; } std::vector macro_rules; macro_rules.emplace_back (std::move (matcher), std::move (transcriber), locus); return std::unique_ptr ( AST::MacroRulesDefinition::decl_macro (std::move (rule_name), macro_rules, std::move (outer_attrs), macro_locus, vis)); } else if (t->get_id () == LEFT_CURLY) { // multiple definitions of macro rule separated by comma // e.g. `macro foo { () => {}, ($e:expr) => {}, }` // parse left curly const_TokenPtr left_curly = expect_token (LEFT_CURLY); if (left_curly == nullptr) { return nullptr; } // parse actual macro rules std::vector macro_rules; // must be at least one macro rule, so parse it AST::MacroRule initial_rule = parse_macro_rule (); if (initial_rule.is_error ()) { Error error ( lexer.peek_token ()->get_locus (), "required first macro rule in declarative macro definition " "could not be parsed"); add_error (std::move (error)); // skip after somewhere? return nullptr; } macro_rules.push_back (std::move (initial_rule)); t = lexer.peek_token (); // parse macro rules while (t->get_id () == COMMA) { // skip comma lexer.skip_token (); // don't parse if end of macro rules if (Parse::Utils::token_id_matches_delims ( lexer.peek_token ()->get_id (), AST::CURLY)) { break; } // try to parse next rule AST::MacroRule rule = parse_macro_rule (); if (rule.is_error ()) { Error error ( lexer.peek_token ()->get_locus (), "failed to parse macro rule in declarative macro definition"); add_error (std::move (error)); return nullptr; } macro_rules.push_back (std::move (rule)); t = lexer.peek_token (); } // parse right curly const_TokenPtr right_curly = expect_token (RIGHT_CURLY); if (right_curly == nullptr) { return nullptr; } return std::unique_ptr ( AST::MacroRulesDefinition::decl_macro (std::move (rule_name), std::move (macro_rules), std::move (outer_attrs), macro_locus, vis)); } else { add_error (Error (t->get_locus (), "unexpected token %qs - expecting delimiters " "(for a declarative macro definiton)", t->get_token_description ())); return nullptr; } } /* Parses a visibility syntactical production (i.e. creating a non-default * visibility) */ template tl::expected Parser::parse_visibility () { // check for no visibility if (lexer.peek_token ()->get_id () != PUB) { return AST::Visibility::create_private (); } auto vis_loc = lexer.peek_token ()->get_locus (); lexer.skip_token (); // create simple pub visibility if // - found no parentheses // - found unit type `()` if (lexer.peek_token ()->get_id () != LEFT_PAREN || lexer.peek_token (1)->get_id () == RIGHT_PAREN) { return AST::Visibility::create_public (vis_loc); // or whatever } lexer.skip_token (); const_TokenPtr t = lexer.peek_token (); auto path_loc = t->get_locus (); switch (t->get_id ()) { case CRATE: lexer.skip_token (); skip_token (RIGHT_PAREN); return AST::Visibility::create_crate (path_loc, vis_loc); case SELF: lexer.skip_token (); skip_token (RIGHT_PAREN); return AST::Visibility::create_self (path_loc, vis_loc); case SUPER: lexer.skip_token (); skip_token (RIGHT_PAREN); return AST::Visibility::create_super (path_loc, vis_loc); case IN: { lexer.skip_token (); // parse the "in" path as well auto path = parse_simple_path (); if (!path) { Error error (lexer.peek_token ()->get_locus (), "missing path in pub(in path) visibility"); add_error (std::move (error)); // skip after somewhere? return Parse::Error::Visibility::make_missing_path (); } skip_token (RIGHT_PAREN); return AST::Visibility::create_in_path (std::move (path.value ()), vis_loc); } default: add_error (Error (t->get_locus (), "unexpected token %qs in visibility", t->get_token_description ())); lexer.skip_token (); return Parse::Error::Visibility::make_malformed (); } } // Parses a module - either a bodied module or a module defined in another file. template std::unique_ptr Parser::parse_module (AST::Visibility vis, AST::AttrVec outer_attrs) { location_t locus = lexer.peek_token ()->get_locus (); Unsafety safety = Unsafety::Normal; if (lexer.peek_token ()->get_id () == UNSAFE) { safety = Unsafety::Unsafe; skip_token (UNSAFE); } skip_token (MOD); const_TokenPtr module_name = expect_token (IDENTIFIER); if (module_name == nullptr) { return nullptr; } Identifier name{module_name}; const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case SEMICOLON: lexer.skip_token (); // Construct an external module return std::unique_ptr ( new AST::Module (std::move (name), std::move (vis), std::move (outer_attrs), locus, safety, lexer.get_filename (), inline_module_stack)); case LEFT_CURLY: { lexer.skip_token (); // parse inner attributes AST::AttrVec inner_attrs = parse_inner_attributes (); std::string default_path = name.as_string (); if (inline_module_stack.empty ()) { std::string filename = lexer.get_filename (); auto slash_idx = filename.rfind (file_separator); if (slash_idx == std::string::npos) slash_idx = 0; else slash_idx++; filename = filename.substr (slash_idx); std::string subdir; if (get_file_subdir (filename, subdir)) default_path = subdir + file_separator + name.as_string (); } std::string module_path_name = extract_module_path (inner_attrs, outer_attrs, default_path); InlineModuleStackScope scope (*this, std::move (module_path_name)); // parse items std::vector> items; const_TokenPtr tok = lexer.peek_token (); while (tok->get_id () != RIGHT_CURLY) { auto item = parse_item (false); if (!item) { Error error (tok->get_locus (), "failed to parse item in module"); add_error (std::move (error)); return nullptr; } items.push_back (std::move (item.value ())); tok = lexer.peek_token (); } if (!skip_token (RIGHT_CURLY)) { // skip somewhere? return nullptr; } return std::unique_ptr ( new AST::Module (std::move (name), locus, std::move (items), std::move (vis), safety, std::move (inner_attrs), std::move (outer_attrs))); // module name? } default: add_error ( Error (t->get_locus (), "unexpected token %qs in module declaration/definition item", t->get_token_description ())); lexer.skip_token (); return nullptr; } } // Parses an extern crate declaration (dependency on external crate) template std::unique_ptr Parser::parse_extern_crate (AST::Visibility vis, AST::AttrVec outer_attrs) { location_t locus = lexer.peek_token ()->get_locus (); if (!skip_token (EXTERN_KW)) { skip_after_semicolon (); return nullptr; } if (!skip_token (CRATE)) { skip_after_semicolon (); return nullptr; } /* parse crate reference name - this has its own syntactical rule in reference * but seems to not be used elsewhere, so i'm putting it here */ const_TokenPtr crate_name_tok = lexer.peek_token (); std::string crate_name; switch (crate_name_tok->get_id ()) { case IDENTIFIER: crate_name = crate_name_tok->get_str (); lexer.skip_token (); break; case SELF: crate_name = Values::Keywords::SELF; lexer.skip_token (); break; default: add_error ( Error (crate_name_tok->get_locus (), "expecting crate name (identifier or %), found %qs", crate_name_tok->get_token_description ())); skip_after_semicolon (); return nullptr; } // don't parse as clause if it doesn't exist if (lexer.peek_token ()->get_id () == SEMICOLON) { lexer.skip_token (); return std::unique_ptr ( new AST::ExternCrate (std::move (crate_name), std::move (vis), std::move (outer_attrs), locus)); } /* parse as clause - this also has its own syntactical rule in reference and * also seems to not be used elsewhere, so including here again. */ if (!skip_token (AS)) { skip_after_semicolon (); return nullptr; } const_TokenPtr as_name_tok = lexer.peek_token (); std::string as_name; switch (as_name_tok->get_id ()) { case IDENTIFIER: as_name = as_name_tok->get_str (); lexer.skip_token (); break; case UNDERSCORE: as_name = Values::Keywords::UNDERSCORE; lexer.skip_token (); break; default: add_error ( Error (as_name_tok->get_locus (), "expecting as clause name (identifier or %<_%>), found %qs", as_name_tok->get_token_description ())); skip_after_semicolon (); return nullptr; } if (!skip_token (SEMICOLON)) { skip_after_semicolon (); return nullptr; } return std::unique_ptr ( new AST::ExternCrate (std::move (crate_name), std::move (vis), std::move (outer_attrs), locus, std::move (as_name))); } // Parses a use declaration. template std::unique_ptr Parser::parse_use_decl (AST::Visibility vis, AST::AttrVec outer_attrs) { location_t locus = lexer.peek_token ()->get_locus (); if (!skip_token (USE)) { skip_after_semicolon (); return nullptr; } // parse use tree, which is required std::unique_ptr use_tree = parse_use_tree (); if (use_tree == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse use tree in use declaration"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } if (!skip_token (SEMICOLON)) { skip_after_semicolon (); return nullptr; } return std::unique_ptr ( new AST::UseDeclaration (std::move (use_tree), std::move (vis), std::move (outer_attrs), locus)); } // Parses a use tree (which can be recursive and is actually a base class). template std::unique_ptr Parser::parse_use_tree () { /* potential syntax definitions in attempt to get algorithm: * Glob: * <- SimplePath :: * * <- :: * * <- * * Nested tree thing: * <- SimplePath :: { COMPLICATED_INNER_TREE_THING } * <- :: COMPLICATED_INNER_TREE_THING } * <- { COMPLICATED_INNER_TREE_THING } * Rebind thing: * <- SimplePath as IDENTIFIER * <- SimplePath as _ * <- SimplePath */ /* current plan of attack: try to parse SimplePath first - if fails, one of * top two then try parse :: - if fails, one of top two. Next is deciding * character for top two. */ /* Thus, parsing smaller parts of use tree may require feeding into function * via parameters (or could handle all in this single function because other * use tree types aren't recognised as separate in the spec) */ // TODO: I think this function is too complex, probably should split it location_t locus = lexer.peek_token ()->get_locus (); // bool has_path = false; auto path = parse_simple_path (); if (!path) { // has no path, so must be glob or nested tree UseTree type bool is_global = false; // check for global scope resolution operator if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) { lexer.skip_token (); is_global = true; } const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case ASTERISK: // glob UseTree type lexer.skip_token (); if (is_global) return std::unique_ptr ( new AST::UseTreeGlob (AST::UseTreeGlob::GLOBAL, AST::SimplePath::create_empty (), locus)); else return std::unique_ptr ( new AST::UseTreeGlob (AST::UseTreeGlob::NO_PATH, AST::SimplePath::create_empty (), locus)); case LEFT_CURLY: { // nested tree UseTree type lexer.skip_token (); std::vector> use_trees; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_CURLY) { std::unique_ptr use_tree = parse_use_tree (); if (use_tree == nullptr) { break; } use_trees.push_back (std::move (use_tree)); if (lexer.peek_token ()->get_id () != COMMA) break; lexer.skip_token (); t = lexer.peek_token (); } // skip end curly delimiter if (!skip_token (RIGHT_CURLY)) { // skip after somewhere? return nullptr; } if (is_global) return std::unique_ptr ( new AST::UseTreeList (AST::UseTreeList::GLOBAL, AST::SimplePath::create_empty (), std::move (use_trees), locus)); else return std::unique_ptr ( new AST::UseTreeList (AST::UseTreeList::NO_PATH, AST::SimplePath::create_empty (), std::move (use_trees), locus)); } case AS: // this is not allowed add_error (Error ( t->get_locus (), "use declaration with rebind % requires a valid simple path - " "none found")); skip_after_semicolon (); return nullptr; default: add_error (Error (t->get_locus (), "unexpected token %qs in use tree with " "no valid simple path (i.e. list" " or glob use tree)", t->get_token_description ())); skip_after_semicolon (); return nullptr; } } else { const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case AS: { // rebind UseTree type lexer.skip_token (); const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case IDENTIFIER: // skip lexer token lexer.skip_token (); return std::unique_ptr ( new AST::UseTreeRebind (AST::UseTreeRebind::IDENTIFIER, std::move (path.value ()), locus, t)); case UNDERSCORE: // skip lexer token lexer.skip_token (); return std::unique_ptr ( new AST::UseTreeRebind (AST::UseTreeRebind::WILDCARD, std::move (path.value ()), locus, {Values::Keywords::UNDERSCORE, t->get_locus ()})); default: add_error (Error ( t->get_locus (), "unexpected token %qs in use tree with as clause - expected " "identifier or %<_%>", t->get_token_description ())); skip_after_semicolon (); return nullptr; } } case SEMICOLON: // rebind UseTree type without rebinding - path only // don't skip semicolon - handled in parse_use_tree // lexer.skip_token(); case COMMA: case RIGHT_CURLY: // this may occur in recursive calls - assume it is ok and ignore it return std::unique_ptr ( new AST::UseTreeRebind (AST::UseTreeRebind::NONE, std::move (path.value ()), locus)); case SCOPE_RESOLUTION: // keep going break; default: add_error (Error (t->get_locus (), "unexpected token %qs in use tree with valid path", t->get_token_description ())); return nullptr; } skip_token (); t = lexer.peek_token (); switch (t->get_id ()) { case ASTERISK: // glob UseTree type lexer.skip_token (); return std::unique_ptr ( new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED, std::move (path.value ()), locus)); case LEFT_CURLY: { // nested tree UseTree type lexer.skip_token (); std::vector> use_trees; // TODO: think of better control structure const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_CURLY) { std::unique_ptr use_tree = parse_use_tree (); if (use_tree == nullptr) { break; } use_trees.push_back (std::move (use_tree)); if (lexer.peek_token ()->get_id () != COMMA) break; lexer.skip_token (); t = lexer.peek_token (); } // skip end curly delimiter if (!skip_token (RIGHT_CURLY)) { // skip after somewhere? return nullptr; } return std::unique_ptr ( new AST::UseTreeList (AST::UseTreeList::PATH_PREFIXED, std::move (path.value ()), std::move (use_trees), locus)); } default: add_error (Error (t->get_locus (), "unexpected token %qs in use tree with valid path", t->get_token_description ())); // skip_after_semicolon(); return nullptr; } } } // Parses a function (not a method). template std::unique_ptr Parser::parse_function (AST::Visibility vis, AST::AttrVec outer_attrs, bool is_external) { location_t locus = lexer.peek_token ()->get_locus (); // Get qualifiers for function if they exist auto qualifiers = parse_function_qualifiers (); if (!qualifiers) return nullptr; skip_token (FN_KW); // Save function name token const_TokenPtr function_name_tok = expect_token (IDENTIFIER); if (function_name_tok == nullptr) { skip_after_next_block (); return nullptr; } Identifier function_name{function_name_tok}; // parse generic params - if exist std::vector> generic_params = parse_generic_params_in_angles (); if (!skip_token (LEFT_PAREN)) { Error error (lexer.peek_token ()->get_locus (), "function declaration missing opening parentheses before " "parameter list"); add_error (std::move (error)); skip_after_next_block (); return nullptr; } auto initial_param = parse_self_param (); if (!initial_param.has_value () && initial_param.error ().kind != Parse::Error::Self::Kind::NOT_SELF) return nullptr; if (initial_param.has_value () && lexer.peek_token ()->get_id () == COMMA) skip_token (); // parse function parameters (only if next token isn't right paren) std::vector> function_params; if (lexer.peek_token ()->get_id () != RIGHT_PAREN) function_params = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); if (initial_param.has_value ()) function_params.insert (function_params.begin (), std::move (*initial_param)); if (!skip_token (RIGHT_PAREN)) { Error error (lexer.peek_token ()->get_locus (), "function declaration missing closing parentheses after " "parameter list"); add_error (std::move (error)); skip_after_next_block (); return nullptr; } // parse function return type - if exists std::unique_ptr return_type = parse_function_return_type (); // parse where clause - if exists AST::WhereClause where_clause = parse_where_clause (); tl::optional> body = tl::nullopt; if (lexer.peek_token ()->get_id () == SEMICOLON) lexer.skip_token (); else { auto block_expr = parse_block_expr (); if (!block_expr) return nullptr; body = std::move (block_expr.value ()); } return std::unique_ptr (new AST::Function ( std::move (function_name), std::move (qualifiers.value ()), std::move (generic_params), std::move (function_params), std::move (return_type), std::move (where_clause), std::move (body), std::move (vis), std::move (outer_attrs), locus, is_external)); } // Parses function or method qualifiers (i.e. const, unsafe, and extern). template tl::expected Parser::parse_function_qualifiers () { location_t locus = lexer.peek_token ()->get_locus (); auto parsed = parse_function_qualifiers_raw (locus); if (!parsed) return tl::unexpected (parsed.error ()); return function_qualifiers_from_keywords (locus, std::move (parsed->first), std::move (parsed->second)); } // Take the list of parsed function qualifiers and convert it to // the corrresponding flags to pass to the AST item constructor. // // This assumes ``keywords`` contains only those tokens that // map to qualifiers. template tl::expected Parser::function_qualifiers_from_keywords ( location_t locus, const std::vector keywords, std::string abi) { Default default_status = Default::No; Async async_status = Async::No; Const const_status = Const::No; Unsafety unsafe_status = Unsafety::Normal; bool has_extern = false; for (auto qualifier : keywords) { switch (qualifier) { case IDENTIFIER: // only "default" is valid in this context default_status = Default::Yes; continue; case CONST: const_status = Const::Yes; continue; case ASYNC: async_status = Async::Yes; continue; case UNSAFE: unsafe_status = Unsafety::Unsafe; continue; case EXTERN_KW: has_extern = true; continue; default: // non-qualifier token in input rust_unreachable (); } } return AST::FunctionQualifiers (locus, default_status, async_status, const_status, unsafe_status, has_extern, std::move (abi)); } // this consumes as many function qualifier tokens while ensuring // uniqueness. template tl::expected, std::string>, Parse::Error::Node> Parser::parse_function_qualifiers_raw (location_t locus) { std::vector found_order; std::string abi; // this will terminate on duplicates or the first non-qualifier token while (true) { auto token = lexer.peek_token (); const TokenId token_id = token->get_id (); location_t locus = lexer.peek_token ()->get_locus (); switch (token_id) { case IDENTIFIER: if (token->get_str () != Values::WeakKeywords::DEFAULT) { // only "default" is valid in this context, so this must // be a non-qualifier keyword goto done; } // fallthrough case CONST: case ASYNC: case UNSAFE: found_order.push_back (token_id); lexer.skip_token (); break; case EXTERN_KW: { found_order.push_back (token_id); lexer.skip_token (); // detect optional abi name const_TokenPtr next_tok = lexer.peek_token (); if (next_tok->get_id () == STRING_LITERAL) { abi = next_tok->get_str (); lexer.skip_token (); } } break; default: // non-qualifier keyword goto done; } if (std::count (found_order.cbegin (), found_order.cend (), token_id) > 1) { // qualifiers mustn't appear twice Error error (locus, "encountered duplicate function qualifier %qs", token->get_token_description ()); add_error (std::move (error)); return tl::unexpected ( Parse::Error::Node::MALFORMED); } } done: if (!ensure_function_qualifier_order (locus, found_order)) return tl::unexpected (Parse::Error::Node::MALFORMED); return make_pair (found_order, abi); } // Validate the order of the list of function qualifiers; this assumes that // ``found_order`` consists only of function qualifier tokens. // // If the order is illegal, the generated error message gives both the wrong // order as found in the source and the correct order according to Rust syntax // rules. template bool Parser::ensure_function_qualifier_order ( location_t locus, const std::vector &found_order) { // Check in order of default, const, async, unsafe, extern auto token_priority = [] (const TokenId id) { switch (id) { case IDENTIFIER: // "default"; the only "weak" keyword considered here return 1; case CONST: return 2; case ASYNC: return 3; case UNSAFE: return 4; case EXTERN_KW: return 5; default: rust_unreachable (); }; }; size_t last_priority = 0; for (auto token_id : found_order) { const size_t priority = token_priority (token_id); if (priority <= last_priority) { emit_function_qualifier_order_error_msg (locus, found_order); return false; } last_priority = priority; } return true; } template void Parser::emit_function_qualifier_order_error_msg ( location_t locus, const std::vector &found_order) { std::vector expected_order = {IDENTIFIER, CONST, ASYNC, UNSAFE, EXTERN_KW}; // we only keep the qualifiers actually used in the offending code std::vector::iterator token_id = expected_order.begin (); while (token_id != expected_order.end ()) { if (std::find (found_order.cbegin (), found_order.cend (), *token_id) == found_order.cend ()) { token_id = expected_order.erase (token_id); } else { ++token_id; } } auto qualifiers_to_str = [] (const std::vector &token_ids) { std::ostringstream ss; for (auto id : token_ids) { if (ss.tellp () != 0) ss << ' '; if (id == IDENTIFIER) ss << Values::WeakKeywords::DEFAULT; else ss << token_id_keyword_string (id); } return ss.str (); }; const std::string found_qualifiers = qualifiers_to_str (found_order); const std::string expected_qualifiers = qualifiers_to_str (expected_order); location_t error_locus = make_location (locus, locus, lexer.peek_token ()->get_locus ()); Error error (error_locus, "invalid order of function qualifiers; found %qs, expected %qs", found_qualifiers.c_str (), expected_qualifiers.c_str ()); add_error (std::move (error)); } // Parses generic (lifetime or type) params inside angle brackets (optional). template std::vector> Parser::parse_generic_params_in_angles () { if (lexer.peek_token ()->get_id () != LEFT_ANGLE) { // seems to be no generic params, so exit with empty vector return std::vector> (); } lexer.skip_token (); // DEBUG: rust_debug ("skipped left angle in generic param"); std::vector> generic_params = parse_generic_params (Parse::Utils::is_right_angle_tok); // DEBUG: rust_debug ("finished parsing actual generic params (i.e. inside angles)"); if (!skip_generics_right_angle ()) { // DEBUG rust_debug ("failed to skip generics right angle - returning empty " "generic params"); return std::vector> (); } return generic_params; } template template std::unique_ptr Parser::parse_generic_param (EndTokenPred is_end_token) { auto outer_attrs = parse_outer_attributes (); std::unique_ptr param; auto token = lexer.peek_token (); switch (token->get_id ()) { case LIFETIME: { auto lifetime = parse_lifetime (false); if (!lifetime) { Error error (token->get_locus (), "failed to parse lifetime in generic parameter list"); add_error (std::move (error)); return nullptr; } std::vector lifetime_bounds; if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); // parse required bounds lifetime_bounds = parse_lifetime_bounds ([is_end_token] (TokenId id) { return is_end_token (id) || id == COMMA; }); } param = std::unique_ptr (new AST::LifetimeParam ( std::move (lifetime.value ()), std::move (lifetime_bounds), std::move (outer_attrs), token->get_locus ())); break; } case IDENTIFIER: { auto type_ident = token->get_str (); lexer.skip_token (); std::vector> type_param_bounds; if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); // parse optional type param bounds type_param_bounds = parse_type_param_bounds (); } std::unique_ptr type = nullptr; if (lexer.peek_token ()->get_id () == EQUAL) { lexer.skip_token (); // parse required type type = parse_type (); if (!type) { Error error ( lexer.peek_token ()->get_locus (), "failed to parse type in type param in generic params"); add_error (std::move (error)); return nullptr; } } param = std::unique_ptr ( new AST::TypeParam (std::move (type_ident), token->get_locus (), std::move (type_param_bounds), std::move (type), std::move (outer_attrs))); break; } case CONST: { lexer.skip_token (); auto name_token = expect_token (IDENTIFIER); if (!name_token || !expect_token (COLON)) return nullptr; auto type = parse_type (); if (!type) return nullptr; // optional default value tl::optional default_expr = tl::nullopt; if (lexer.peek_token ()->get_id () == EQUAL) { lexer.skip_token (); auto tok = lexer.peek_token (); default_expr = parse_generic_arg (); if (!default_expr) { Error error (tok->get_locus (), "invalid token for start of default value for " "const generic parameter: expected %, " "% or %, got %qs", token_id_to_str (tok->get_id ())); add_error (std::move (error)); return nullptr; } // At this point, we *know* that we are parsing a const // expression if (default_expr.value ().get_kind () == AST::GenericArg::Kind::Either) default_expr = default_expr.value ().disambiguate_to_const (); } param = std::unique_ptr ( new AST::ConstGenericParam (name_token->get_str (), std::move (type), default_expr, std::move (outer_attrs), token->get_locus ())); break; } default: // FIXME: Can we clean this last call with a method call? Error error (token->get_locus (), "unexpected token when parsing generic parameters: %qs", token->as_string ().c_str ()); add_error (std::move (error)); return nullptr; } return param; } /* Parse generic (lifetime or type) params NOT INSIDE ANGLE BRACKETS!!! Almost * always parse_generic_params_in_angles is what is wanted. */ template template std::vector> Parser::parse_generic_params (EndTokenPred is_end_token) { std::vector> generic_params; /* can't parse lifetime and type params separately due to lookahead issues * thus, parse them all here */ /* HACK: used to retain attribute data if a lifetime param is tentatively * parsed but it turns out to be type param */ AST::Attribute parsed_outer_attr = AST::Attribute::create_empty (); // Did we parse a generic type param yet auto type_seen = false; // Did we parse a const param with a default value yet auto const_with_default_seen = false; // Did the user write a lifetime parameter after a type one auto order_error = false; // Did the user write a const param with a default value after a type one auto const_with_default_order_error = false; // parse lifetime params while (!is_end_token (lexer.peek_token ()->get_id ())) { auto param = parse_generic_param (is_end_token); if (param) { if (param->get_kind () == AST::GenericParam::Kind::Type) { type_seen = true; if (const_with_default_seen) const_with_default_order_error = true; } else if (param->get_kind () == AST::GenericParam::Kind::Lifetime && type_seen) { order_error = true; if (const_with_default_seen) const_with_default_order_error = true; } else if (param->get_kind () == AST::GenericParam::Kind::Const) { type_seen = true; AST::ConstGenericParam *const_param = static_cast (param.get ()); if (const_param->has_default_value ()) const_with_default_seen = true; else if (const_with_default_seen) const_with_default_order_error = true; } generic_params.emplace_back (std::move (param)); maybe_skip_token (COMMA); } else break; } // FIXME: Add reordering hint if (order_error) { Error error (generic_params.front ()->get_locus (), "invalid order for generic parameters: lifetime parameters " "must be declared prior to type and const parameters"); add_error (std::move (error)); } if (const_with_default_order_error) { Error error (generic_params.front ()->get_locus (), "invalid order for generic parameters: generic parameters " "with a default must be trailing"); add_error (std::move (error)); } generic_params.shrink_to_fit (); return generic_params; } /* Parses lifetime generic parameters (pointers). Will also consume any * trailing comma. No extra checks for end token. */ template std::vector> Parser::parse_lifetime_params () { std::vector> lifetime_params; while (lexer.peek_token ()->get_id () != END_OF_FILE) { auto lifetime_param = parse_lifetime_param (); if (!lifetime_param) { // can't treat as error as only way to get out with trailing comma break; } lifetime_params.emplace_back ( new AST::LifetimeParam (std::move (lifetime_param.value ()))); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } lifetime_params.shrink_to_fit (); return lifetime_params; } /* Parses lifetime generic parameters (pointers). Will also consume any * trailing comma. Has extra is_end_token predicate checking. */ template template std::vector> Parser::parse_lifetime_params (EndTokenPred is_end_token) { std::vector> lifetime_params; // if end_token is not specified, it defaults to EOF, so should work fine while (!is_end_token (lexer.peek_token ()->get_id ())) { auto lifetime_param = parse_lifetime_param (); if (!lifetime_param) { /* TODO: is it worth throwing away all lifetime params just because * one failed? */ Error error (lexer.peek_token ()->get_locus (), "failed to parse lifetime param in lifetime params"); add_error (std::move (error)); return {}; } lifetime_params.emplace_back ( new AST::LifetimeParam (std::move (lifetime_param))); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } lifetime_params.shrink_to_fit (); return lifetime_params; } /* Parses lifetime generic parameters (objects). Will also consume any * trailing comma. No extra checks for end token. * TODO: is this best solution? implements most of the same algorithm. * TODO: seems to be unused, remove? */ template std::vector Parser::parse_lifetime_params_objs () { std::vector lifetime_params; // bad control structure as end token cannot be guaranteed while (true) { auto lifetime_param = parse_lifetime_param (); if (!lifetime_param) { // not an error as only way to exit if trailing comma break; } lifetime_params.push_back (std::move (lifetime_param.value ())); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } lifetime_params.shrink_to_fit (); return lifetime_params; } /* Parses lifetime generic parameters (objects). Will also consume any * trailing comma. Has extra is_end_token predicate checking. * TODO: is this best solution? implements most of the same algorithm. */ template template std::vector Parser::parse_lifetime_params_objs ( EndTokenPred is_end_token) { std::vector lifetime_params; while (!is_end_token (lexer.peek_token ()->get_id ())) { auto lifetime_param = parse_lifetime_param (); if (!lifetime_param) { /* TODO: is it worth throwing away all lifetime params just because * one failed? */ Error error (lexer.peek_token ()->get_locus (), "failed to parse lifetime param in lifetime params"); add_error (std::move (error)); return {}; } lifetime_params.push_back (std::move (lifetime_param.value ())); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } lifetime_params.shrink_to_fit (); return lifetime_params; } /* Parses a sequence of a certain grammar rule in object form (not pointer or * smart pointer), delimited by commas and ending when 'is_end_token' is * satisfied (templated). Will also consume any trailing comma. * FIXME: this cannot be used due to member function pointer problems (i.e. * parsing_function cannot be specified properly) */ template template auto Parser::parse_non_ptr_sequence ( ParseFunction parsing_function, EndTokenPred is_end_token, std::string error_msg) -> std::vector { std::vector params; while (!is_end_token (lexer.peek_token ()->get_id ())) { auto param = parsing_function (); if (param.is_error ()) { // TODO: is it worth throwing away all params just because one // failed? Error error (lexer.peek_token ()->get_locus (), std::move (error_msg)); add_error (std::move (error)); return {}; } params.push_back (std::move (param)); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } params.shrink_to_fit (); return params; } /* Parses a single lifetime generic parameter (not including comma). */ template tl::expected Parser::parse_lifetime_param () { // parse outer attributes, which are optional and may not exist auto outer_attrs = parse_outer_attributes (); // save lifetime token - required const_TokenPtr lifetime_tok = lexer.peek_token (); if (lifetime_tok->get_id () != LIFETIME) { // if lifetime is missing, must not be a lifetime param, so return error return Parse::Error::LifetimeParam::make_not_a_lifetime_param (); } lexer.skip_token (); AST::Lifetime lifetime (AST::Lifetime::NAMED, lifetime_tok->get_str (), lifetime_tok->get_locus ()); // parse lifetime bounds, if it exists std::vector lifetime_bounds; if (lexer.peek_token ()->get_id () == COLON) { // parse lifetime bounds lifetime_bounds = parse_lifetime_bounds (); // TODO: have end token passed in? } return AST::LifetimeParam (std::move (lifetime), std::move (lifetime_bounds), std::move (outer_attrs), lifetime_tok->get_locus ()); } // Parses type generic parameters. Will also consume any trailing comma. template std::vector> Parser::parse_type_params () { std::vector> type_params; // infinite loop with break on failure as no info on ending token while (true) { std::unique_ptr type_param = parse_type_param (); if (type_param == nullptr) { // break if fails to parse break; } type_params.push_back (std::move (type_param)); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } type_params.shrink_to_fit (); return type_params; } // Parses type generic parameters. Will also consume any trailing comma. template template std::vector> Parser::parse_type_params (EndTokenPred is_end_token) { std::vector> type_params; while (!is_end_token (lexer.peek_token ()->get_id ())) { std::unique_ptr type_param = parse_type_param (); if (type_param == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse type param in type params"); add_error (std::move (error)); return {}; } type_params.push_back (std::move (type_param)); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } type_params.shrink_to_fit (); return type_params; /* TODO: this shares most code with parse_lifetime_params - good place to * use template (i.e. parse_non_ptr_sequence if doable) */ } /* Parses a single type (generic) parameter, not including commas. May change * to return value. */ template std::unique_ptr Parser::parse_type_param () { // parse outer attributes, which are optional and may not exist auto outer_attrs = parse_outer_attributes (); const_TokenPtr identifier_tok = lexer.peek_token (); if (identifier_tok->get_id () != IDENTIFIER) { // return null as type param can't exist without this required // identifier return nullptr; } Identifier ident{identifier_tok}; lexer.skip_token (); // parse type param bounds (if they exist) std::vector> type_param_bounds; if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); // parse type param bounds, which may or may not exist type_param_bounds = parse_type_param_bounds (); } // parse type (if it exists) std::unique_ptr type = nullptr; if (lexer.peek_token ()->get_id () == EQUAL) { lexer.skip_token (); // parse type (now required) type = parse_type (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse type in type param"); add_error (std::move (error)); return nullptr; } } return std::unique_ptr ( new AST::TypeParam (std::move (ident), identifier_tok->get_locus (), std::move (type_param_bounds), std::move (type), std::move (outer_attrs))); } /* Parses regular (i.e. non-generic) parameters in functions or methods. Also * has end token handling. */ template template std::vector> Parser::parse_function_params (EndTokenPred is_end_token) { std::vector> params; if (is_end_token (lexer.peek_token ()->get_id ())) return params; auto initial_param = parse_function_param (); // Return empty parameter list if no parameter there if (initial_param == nullptr) { // TODO: is this an error? return params; } params.push_back (std::move (initial_param)); // maybe think of a better control structure here - do-while with an initial // error state? basically, loop through parameter list until can't find any // more params const_TokenPtr t = lexer.peek_token (); while (t->get_id () == COMMA) { // skip comma if applies lexer.skip_token (); // TODO: strictly speaking, shouldn't there be no trailing comma? if (is_end_token (lexer.peek_token ()->get_id ())) break; // now, as right paren would break, function param is required auto param = parse_function_param (); if (param == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse function param (in function params)"); add_error (std::move (error)); // skip somewhere? return std::vector> (); } params.push_back (std::move (param)); t = lexer.peek_token (); } params.shrink_to_fit (); return params; } /* Parses a single regular (i.e. non-generic) parameter in a function or * method, i.e. the "name: type" bit. Also handles it not existing. */ template std::unique_ptr Parser::parse_function_param () { // parse outer attributes if they exist AST::AttrVec outer_attrs = parse_outer_attributes (); // TODO: should saved location be at start of outer attributes or pattern? location_t locus = lexer.peek_token ()->get_locus (); if (lexer.peek_token ()->get_id () == ELLIPSIS) // Unnamed variadic { lexer.skip_token (); // Skip ellipsis return std::make_unique ( AST::VariadicParam (std::move (outer_attrs), locus)); } std::unique_ptr param_pattern = parse_pattern (); // create error function param if it doesn't exist if (param_pattern == nullptr) { // skip after something return nullptr; } if (!skip_token (COLON)) { // skip after something return nullptr; } if (lexer.peek_token ()->get_id () == ELLIPSIS) // Named variadic { lexer.skip_token (); // Skip ellipsis return std::make_unique ( AST::VariadicParam (std::move (param_pattern), std::move (outer_attrs), locus)); } else { std::unique_ptr param_type = parse_type (); if (param_type == nullptr) { return nullptr; } return std::make_unique ( AST::FunctionParam (std::move (param_pattern), std::move (param_type), std::move (outer_attrs), locus)); } } /* Parses a function or method return type syntactical construction. Also * handles a function return type not existing. */ template std::unique_ptr Parser::parse_function_return_type () { if (lexer.peek_token ()->get_id () != RETURN_TYPE) return nullptr; // skip return type, as it now obviously exists lexer.skip_token (); std::unique_ptr type = parse_type (); return type; } /* Parses a "where clause" (in a function, struct, method, etc.). Also handles * a where clause not existing, in which it will return * WhereClause::create_empty(), which can be checked via * WhereClause::is_empty(). */ template AST::WhereClause Parser::parse_where_clause () { const_TokenPtr where_tok = lexer.peek_token (); if (where_tok->get_id () != WHERE) { // where clause doesn't exist, so create empty one return AST::WhereClause::create_empty (); } lexer.skip_token (); /* parse where clause items - this is not a separate rule in the reference * so won't be here */ std::vector> where_clause_items; std::vector for_lifetimes; if (lexer.peek_token ()->get_id () == FOR) for_lifetimes = parse_for_lifetimes (); /* HACK: where clauses end with a right curly or semicolon or equals in all * uses currently */ const_TokenPtr t = lexer.peek_token (); while (t->get_id () != LEFT_CURLY && t->get_id () != SEMICOLON && t->get_id () != EQUAL) { std::unique_ptr where_clause_item = parse_where_clause_item (for_lifetimes); if (where_clause_item == nullptr) { Error error (t->get_locus (), "failed to parse where clause item"); add_error (std::move (error)); return AST::WhereClause::create_empty (); } where_clause_items.push_back (std::move (where_clause_item)); // also skip comma if it exists if (lexer.peek_token ()->get_id () != COMMA) break; lexer.skip_token (); t = lexer.peek_token (); } where_clause_items.shrink_to_fit (); return AST::WhereClause (std::move (where_clause_items)); } /* Parses a where clause item (lifetime or type bound). Does not parse any * commas. */ template std::unique_ptr Parser::parse_where_clause_item ( const std::vector &outer_for_lifetimes) { // shitty cheat way of determining lifetime or type bound - test for // lifetime const_TokenPtr t = lexer.peek_token (); if (t->get_id () == LIFETIME) return parse_lifetime_where_clause_item (); else return parse_type_bound_where_clause_item (outer_for_lifetimes); } // Parses a lifetime where clause item. template std::unique_ptr Parser::parse_lifetime_where_clause_item () { auto parsed_lifetime = parse_lifetime (false); if (!parsed_lifetime) { // TODO: error here? return nullptr; } auto lifetime = parsed_lifetime.value (); if (!skip_token (COLON)) { // TODO: skip after somewhere return nullptr; } std::vector lifetime_bounds = parse_lifetime_bounds (); // TODO: have end token passed in? location_t locus = lifetime.get_locus (); return std::unique_ptr ( new AST::LifetimeWhereClauseItem (std::move (lifetime), std::move (lifetime_bounds), locus)); } // Parses a type bound where clause item. template std::unique_ptr Parser::parse_type_bound_where_clause_item ( const std::vector &outer_for_lifetimes) { std::vector for_lifetimes = outer_for_lifetimes; std::unique_ptr type = parse_type (); if (type == nullptr) { return nullptr; } if (!skip_token (COLON)) { // TODO: skip after somewhere return nullptr; } if (lexer.peek_token ()->get_id () == FOR) { auto for_lifetimes_inner = parse_for_lifetimes (); for_lifetimes.insert (for_lifetimes.end (), for_lifetimes_inner.begin (), for_lifetimes_inner.end ()); } // parse type param bounds if they exist std::vector> type_param_bounds = parse_type_param_bounds (); location_t locus = lexer.peek_token ()->get_locus (); return std::unique_ptr ( new AST::TypeBoundWhereClauseItem (std::move (for_lifetimes), std::move (type), std::move (type_param_bounds), locus)); } // Parses a for lifetimes clause, including the for keyword and angle // brackets. template std::vector Parser::parse_for_lifetimes () { std::vector params; if (!skip_token (FOR)) { // skip after somewhere? return params; } if (!skip_token (LEFT_ANGLE)) { // skip after somewhere? return params; } /* cannot specify end token due to parsing problems with '>' tokens being * nested */ params = parse_lifetime_params_objs (Parse::Utils::is_right_angle_tok); if (!skip_generics_right_angle ()) { // DEBUG rust_debug ("failed to skip generics right angle after (supposedly) " "finished parsing where clause items"); // ok, well this gets called. // skip after somewhere? return params; } return params; } // Parses type parameter bounds in where clause or generic arguments. template std::vector> Parser::parse_type_param_bounds () { std::vector> type_param_bounds; std::unique_ptr initial_bound = parse_type_param_bound (); // quick exit if null if (initial_bound == nullptr) { /* error? type param bounds must have at least one term, but are bounds * optional? */ return type_param_bounds; } type_param_bounds.push_back (std::move (initial_bound)); while (lexer.peek_token ()->get_id () == PLUS) { lexer.skip_token (); std::unique_ptr bound = parse_type_param_bound (); if (bound == nullptr) { /* not an error: bound is allowed to be null as trailing plus is * allowed */ return type_param_bounds; } type_param_bounds.push_back (std::move (bound)); } type_param_bounds.shrink_to_fit (); return type_param_bounds; } /* Parses type parameter bounds in where clause or generic arguments, with end * token handling. */ template template std::vector> Parser::parse_type_param_bounds (EndTokenPred is_end_token) { std::vector> type_param_bounds; std::unique_ptr initial_bound = parse_type_param_bound (); // quick exit if null if (initial_bound == nullptr) { /* error? type param bounds must have at least one term, but are bounds * optional? */ return type_param_bounds; } type_param_bounds.push_back (std::move (initial_bound)); while (lexer.peek_token ()->get_id () == PLUS) { lexer.skip_token (); // break if end token character if (is_end_token (lexer.peek_token ()->get_id ())) break; std::unique_ptr bound = parse_type_param_bound (); if (bound == nullptr) { // TODO how wise is it to ditch all bounds if only one failed? Error error (lexer.peek_token ()->get_locus (), "failed to parse type param bound in type param bounds"); add_error (std::move (error)); return {}; } type_param_bounds.push_back (std::move (bound)); } type_param_bounds.shrink_to_fit (); return type_param_bounds; } /* Parses a single type parameter bound in a where clause or generic argument. * Does not parse the '+' between arguments. */ template std::unique_ptr Parser::parse_type_param_bound () { // shitty cheat way of determining lifetime or trait bound - test for // lifetime const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case LIFETIME: return std::unique_ptr ( new AST::Lifetime (parse_lifetime (false).value ())); case LEFT_PAREN: case QUESTION_MARK: case FOR: case IDENTIFIER: case SUPER: case SELF: case SELF_ALIAS: case CRATE: case DOLLAR_SIGN: case SCOPE_RESOLUTION: return parse_trait_bound (); default: // don't error - assume this is fine TODO return nullptr; } } // Parses a trait bound type param bound. template std::unique_ptr Parser::parse_trait_bound () { bool has_parens = false; bool has_question_mark = false; location_t locus = lexer.peek_token ()->get_locus (); /* parse optional `for lifetimes`. */ std::vector for_lifetimes; if (lexer.peek_token ()->get_id () == FOR) for_lifetimes = parse_for_lifetimes (); // handle trait bound being in parentheses if (lexer.peek_token ()->get_id () == LEFT_PAREN) { has_parens = true; lexer.skip_token (); } // handle having question mark (optional) if (lexer.peek_token ()->get_id () == QUESTION_MARK) { has_question_mark = true; lexer.skip_token (); } // handle TypePath AST::TypePath type_path = parse_type_path (); if (type_path.is_error ()) return nullptr; // handle closing parentheses if (has_parens) { if (!skip_token (RIGHT_PAREN)) { return nullptr; } } return std::unique_ptr ( new AST::TraitBound (std::move (type_path), locus, has_parens, has_question_mark, std::move (for_lifetimes))); } // Parses lifetime bounds. template std::vector Parser::parse_lifetime_bounds () { std::vector lifetime_bounds; while (true) { auto lifetime = parse_lifetime (false); // quick exit for parsing failure if (!lifetime) break; lifetime_bounds.push_back (std::move (lifetime.value ())); /* plus is maybe not allowed at end - spec defines it weirdly, so * assuming allowed at end */ if (lexer.peek_token ()->get_id () != PLUS) break; lexer.skip_token (); } lifetime_bounds.shrink_to_fit (); return lifetime_bounds; } // Parses lifetime bounds, with added check for ending token. template template std::vector Parser::parse_lifetime_bounds (EndTokenPred is_end_token) { std::vector lifetime_bounds; while (!is_end_token (lexer.peek_token ()->get_id ())) { auto lifetime = parse_lifetime (false); if (!lifetime) { /* TODO: is it worth throwing away all lifetime bound info just * because one failed? */ Error error (lexer.peek_token ()->get_locus (), "failed to parse lifetime in lifetime bounds"); add_error (std::move (error)); return {}; } lifetime_bounds.push_back (std::move (lifetime.value ())); /* plus is maybe not allowed at end - spec defines it weirdly, so * assuming allowed at end */ if (lexer.peek_token ()->get_id () != PLUS) break; lexer.skip_token (); } lifetime_bounds.shrink_to_fit (); return lifetime_bounds; } /* Parses a lifetime token (named, 'static, or '_). Also handles lifetime not * existing. */ template tl::expected Parser::parse_lifetime (bool allow_elided) { const_TokenPtr lifetime_tok = lexer.peek_token (); if (lifetime_tok->get_id () != LIFETIME) { if (allow_elided) { return AST::Lifetime::elided (); } else { return tl::make_unexpected ({}); } } lexer.skip_token (); return lifetime_from_token (lifetime_tok); } template AST::Lifetime Parser::lifetime_from_token (const_TokenPtr tok) { location_t locus = tok->get_locus (); std::string lifetime_ident = tok->get_str (); if (lifetime_ident == "static") { return AST::Lifetime (AST::Lifetime::STATIC, "", locus); } else if (lifetime_ident == "_") { // Explicitly and implicitly elided lifetimes follow the same rules. return AST::Lifetime (AST::Lifetime::WILDCARD, "", locus); } else { return AST::Lifetime (AST::Lifetime::NAMED, std::move (lifetime_ident), locus); } } template std::unique_ptr Parser::parse_external_type_item (AST::Visibility vis, AST::AttrVec outer_attrs) { location_t locus = lexer.peek_token ()->get_locus (); skip_token (TYPE); const_TokenPtr alias_name_tok = expect_token (IDENTIFIER); if (alias_name_tok == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse identifier in external opaque type"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } if (!skip_token (SEMICOLON)) return nullptr; return std::unique_ptr ( new AST::ExternalTypeItem (alias_name_tok->get_str (), std::move (vis), std::move (outer_attrs), std::move (locus))); } // Parses a "type alias" (typedef) item. template std::unique_ptr Parser::parse_type_alias (AST::Visibility vis, AST::AttrVec outer_attrs) { location_t locus = lexer.peek_token ()->get_locus (); skip_token (TYPE); // TODO: use this token for identifier when finished that const_TokenPtr alias_name_tok = expect_token (IDENTIFIER); if (alias_name_tok == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse identifier in type alias"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } Identifier alias_name{alias_name_tok}; // parse generic params, which may not exist std::vector> generic_params = parse_generic_params_in_angles (); // parse where clause, which may not exist AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (EQUAL)) { skip_after_semicolon (); return nullptr; } std::unique_ptr type_to_alias = parse_type (); if (!skip_token (SEMICOLON)) { // should be skipping past this, not the next line return nullptr; } return std::unique_ptr ( new AST::TypeAlias (std::move (alias_name), std::move (generic_params), std::move (where_clause), std::move (type_to_alias), std::move (vis), std::move (outer_attrs), locus)); } // Parse a struct item AST node. template std::unique_ptr Parser::parse_struct (AST::Visibility vis, AST::AttrVec outer_attrs) { /* TODO: determine best way to parse the proper struct vs tuple struct - * share most of initial constructs so lookahead might be impossible, and if * not probably too expensive. Best way is probably unified parsing for the * initial parts and then pass them in as params to more derived functions. * Alternatively, just parse everything in this one function - do this if * function not too long. */ /* Proper struct <- 'struct' IDENTIFIER generic_params? where_clause? ( '{' * struct_fields? '}' | ';' ) */ /* Tuple struct <- 'struct' IDENTIFIER generic_params? '(' tuple_fields? ')' * where_clause? ';' */ location_t locus = lexer.peek_token ()->get_locus (); skip_token (STRUCT_KW); // parse struct name const_TokenPtr name_tok = expect_token (IDENTIFIER); if (name_tok == nullptr) { // skip after somewhere? return nullptr; } Identifier struct_name{name_tok}; // parse generic params, which may or may not exist std::vector> generic_params = parse_generic_params_in_angles (); // branch on next token - determines whether proper struct or tuple struct if (lexer.peek_token ()->get_id () == LEFT_PAREN) { // tuple struct // skip left parenthesis lexer.skip_token (); // parse tuple fields std::vector tuple_fields; // Might be empty tuple for unit tuple struct. if (lexer.peek_token ()->get_id () == RIGHT_PAREN) tuple_fields = std::vector (); else tuple_fields = parse_tuple_fields (); // tuple parameters must have closing parenthesis if (!skip_token (RIGHT_PAREN)) { skip_after_semicolon (); return nullptr; } // parse where clause, which is optional AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (SEMICOLON)) { // can't skip after semicolon because it's meant to be here return nullptr; } return std::unique_ptr ( new AST::TupleStruct (std::move (tuple_fields), std::move (struct_name), std::move (generic_params), std::move (where_clause), std::move (vis), std::move (outer_attrs), locus)); } // assume it is a proper struct being parsed and continue outside of switch // - label only here to suppress warning // parse where clause, which is optional AST::WhereClause where_clause = parse_where_clause (); // branch on next token - determines whether struct is a unit struct const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case LEFT_CURLY: { // struct with body // skip curly bracket lexer.skip_token (); // parse struct fields, if any std::vector struct_fields = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); if (!skip_token (RIGHT_CURLY)) { // skip somewhere? return nullptr; } return std::unique_ptr (new AST::StructStruct ( std::move (struct_fields), std::move (struct_name), std::move (generic_params), std::move (where_clause), false, std::move (vis), std::move (outer_attrs), locus)); } case SEMICOLON: // unit struct declaration lexer.skip_token (); return std::unique_ptr ( new AST::StructStruct (std::move (struct_name), std::move (generic_params), std::move (where_clause), std::move (vis), std::move (outer_attrs), locus)); default: add_error (Error (t->get_locus (), "unexpected token %qs in struct declaration", t->get_token_description ())); // skip somewhere? return nullptr; } } // Parses struct fields in struct declarations. template std::vector Parser::parse_struct_fields () { std::vector fields; AST::StructField initial_field = parse_struct_field (); // Return empty field list if no field there if (initial_field.is_error ()) return fields; fields.push_back (std::move (initial_field)); while (lexer.peek_token ()->get_id () == COMMA) { lexer.skip_token (); AST::StructField field = parse_struct_field (); if (field.is_error ()) { // would occur with trailing comma, so allowed break; } fields.push_back (std::move (field)); } fields.shrink_to_fit (); return fields; // TODO: template if possible (parse_non_ptr_seq) } // Parses struct fields in struct declarations. template template std::vector Parser::parse_struct_fields (EndTokenPred is_end_tok) { std::vector fields; AST::StructField initial_field = parse_struct_field (); // Return empty field list if no field there if (initial_field.is_error ()) return fields; fields.push_back (std::move (initial_field)); while (lexer.peek_token ()->get_id () == COMMA) { lexer.skip_token (); if (is_end_tok (lexer.peek_token ()->get_id ())) break; AST::StructField field = parse_struct_field (); if (field.is_error ()) { /* TODO: should every field be ditched just because one couldn't be * parsed? */ Error error (lexer.peek_token ()->get_locus (), "failed to parse struct field in struct fields"); add_error (std::move (error)); return {}; } fields.push_back (std::move (field)); } fields.shrink_to_fit (); return fields; // TODO: template if possible (parse_non_ptr_seq) } // Parses a single struct field (in a struct definition). Does not parse // commas. template AST::StructField Parser::parse_struct_field () { // parse outer attributes, if they exist AST::AttrVec outer_attrs = parse_outer_attributes (); // parse visibility, if it exists auto vis = parse_visibility (); if (!vis) return AST::StructField::create_error (); location_t locus = lexer.peek_token ()->get_locus (); // parse field name const_TokenPtr field_name_tok = lexer.peek_token (); if (field_name_tok->get_id () != IDENTIFIER) { // if not identifier, assumes there is no struct field and exits - not // necessarily error return AST::StructField::create_error (); } Identifier field_name{field_name_tok}; lexer.skip_token (); if (!skip_token (COLON)) { // skip after somewhere? return AST::StructField::create_error (); } // parse field type - this is required std::unique_ptr field_type = parse_type (); if (field_type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse type in struct field definition"); add_error (std::move (error)); // skip after somewhere return AST::StructField::create_error (); } return AST::StructField (std::move (field_name), std::move (field_type), std::move (vis.value ()), locus, std::move (outer_attrs)); } // Parses tuple fields in tuple/tuple struct declarations. template std::vector Parser::parse_tuple_fields () { std::vector fields; AST::TupleField initial_field = parse_tuple_field (); // Return empty field list if no field there if (initial_field.is_error ()) { return fields; } fields.push_back (std::move (initial_field)); // maybe think of a better control structure here - do-while with an initial // error state? basically, loop through field list until can't find any more // params HACK: all current syntax uses of tuple fields have them ending // with a right paren token const_TokenPtr t = lexer.peek_token (); while (t->get_id () == COMMA) { // skip comma if applies - e.g. trailing comma lexer.skip_token (); // break out due to right paren if it exists if (lexer.peek_token ()->get_id () == RIGHT_PAREN) { break; } AST::TupleField field = parse_tuple_field (); if (field.is_error ()) { Error error (lexer.peek_token ()->get_locus (), "failed to parse tuple field in tuple fields"); add_error (std::move (error)); return std::vector (); } fields.push_back (std::move (field)); t = lexer.peek_token (); } fields.shrink_to_fit (); return fields; // TODO: this shares basically all code with function params and struct // fields // - templates? } /* Parses a single tuple struct field in a tuple struct definition. Does not * parse commas. */ template AST::TupleField Parser::parse_tuple_field () { // parse outer attributes if they exist AST::AttrVec outer_attrs = parse_outer_attributes (); // parse visibility if it exists auto visibility = parse_visibility (); if (!visibility) return AST::TupleField::create_error (); location_t locus = lexer.peek_token ()->get_locus (); // parse type, which is required std::unique_ptr field_type = parse_type (); if (field_type == nullptr) { // error if null Error error (lexer.peek_token ()->get_locus (), "could not parse type in tuple struct field"); add_error (std::move (error)); // skip after something return AST::TupleField::create_error (); } return AST::TupleField (std::move (field_type), std::move (visibility.value ()), locus, std::move (outer_attrs)); } // Parses a Rust "enum" tagged union item definition. template std::unique_ptr Parser::parse_enum (AST::Visibility vis, AST::AttrVec outer_attrs) { location_t locus = lexer.peek_token ()->get_locus (); skip_token (ENUM_KW); // parse enum name const_TokenPtr enum_name_tok = expect_token (IDENTIFIER); if (enum_name_tok == nullptr) return nullptr; Identifier enum_name = {enum_name_tok}; // parse generic params (of enum container, not enum variants) if they exist std::vector> generic_params = parse_generic_params_in_angles (); // parse where clause if it exists AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (LEFT_CURLY)) { skip_after_end_block (); return nullptr; } // parse actual enum variant definitions std::vector> enum_items = parse_enum_items ([] (TokenId id) { return id == RIGHT_CURLY; }); if (!skip_token (RIGHT_CURLY)) { skip_after_end_block (); return nullptr; } return std::unique_ptr ( new AST::Enum (std::move (enum_name), std::move (vis), std::move (generic_params), std::move (where_clause), std::move (enum_items), std::move (outer_attrs), locus)); } // Parses the enum variants inside an enum definiton. template std::vector> Parser::parse_enum_items () { std::vector> items; auto initial_item = parse_enum_item (); // Return empty item list if no field there if (!initial_item) return items; items.push_back (std::move (initial_item.value ())); while (lexer.peek_token ()->get_id () == COMMA) { lexer.skip_token (); auto item = parse_enum_item (); if (!item) { // this would occur with a trailing comma, which is allowed break; } items.push_back (std::move (item.value ())); } items.shrink_to_fit (); return items; /* TODO: use template if doable (parse_non_ptr_sequence) */ } // Parses the enum variants inside an enum definiton. template template std::vector> Parser::parse_enum_items (EndTokenPred is_end_tok) { std::vector> items; auto initial_item = parse_enum_item (); // Return empty item list if no field there if (!initial_item) return items; items.push_back (std::move (initial_item.value ())); while (lexer.peek_token ()->get_id () == COMMA) { lexer.skip_token (); if (is_end_tok (lexer.peek_token ()->get_id ())) break; auto item = parse_enum_item (); if (!item) { /* TODO should this ignore all successfully parsed enum items just * because one failed? */ Error error (lexer.peek_token ()->get_locus (), "failed to parse enum item in enum items"); add_error (std::move (error)); return {}; } items.push_back (std::move (item.value ())); } items.shrink_to_fit (); return items; /* TODO: use template if doable (parse_non_ptr_sequence) */ } /* Parses a single enum variant item in an enum definition. Does not parse * commas. */ template tl::expected, Parse::Error::EnumVariant> Parser::parse_enum_item () { // parse outer attributes if they exist AST::AttrVec outer_attrs = parse_outer_attributes (); // parse visibility, which may or may not exist auto vis_res = parse_visibility (); if (!vis_res) return Parse::Error::EnumVariant::make_child_error (); auto vis = vis_res.value (); // parse name for enum item, which is required const_TokenPtr item_name_tok = lexer.peek_token (); if (item_name_tok->get_id () != IDENTIFIER) { // this may not be an error but it means there is no enum item here return Parse::Error::EnumVariant::make_not_identifier (item_name_tok); } lexer.skip_token (); Identifier item_name{item_name_tok}; // branch based on next token const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case LEFT_PAREN: { // tuple enum item lexer.skip_token (); std::vector tuple_fields; // Might be empty tuple for unit tuple enum variant. if (lexer.peek_token ()->get_id () == RIGHT_PAREN) tuple_fields = std::vector (); else tuple_fields = parse_tuple_fields (); if (!skip_token (RIGHT_PAREN)) { // skip after somewhere return Parse::Error::EnumVariant::make_unfinished_tuple_variant (); } return std::unique_ptr (new AST::EnumItemTuple ( std::move (item_name), std::move (vis), std::move (tuple_fields), std::move (outer_attrs), item_name_tok->get_locus ())); } case LEFT_CURLY: { // struct enum item lexer.skip_token (); std::vector struct_fields = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); if (!skip_token (RIGHT_CURLY)) { // skip after somewhere return Parse::Error::EnumVariant::make_unfinished_tuple_variant (); } return std::unique_ptr (new AST::EnumItemStruct ( std::move (item_name), std::move (vis), std::move (struct_fields), std::move (outer_attrs), item_name_tok->get_locus ())); } case EQUAL: { // discriminant enum item lexer.skip_token (); auto discriminant_expr = parse_expr (); if (!discriminant_expr) return Parse::Error::EnumVariant::make_child_error (); return std::make_unique ( std::move (item_name), std::move (vis), std::move (discriminant_expr.value ()), std::move (outer_attrs), item_name_tok->get_locus ()); } default: // regular enum with just an identifier return std::make_unique (std::move (item_name), std::move (vis), std::move (outer_attrs), item_name_tok->get_locus ()); } } // Parses a C-style (and C-compat) untagged union declaration. template std::unique_ptr Parser::parse_union (AST::Visibility vis, AST::AttrVec outer_attrs) { /* hack - "weak keyword" by finding identifier called "union" (lookahead in * item switch) */ const_TokenPtr union_keyword = expect_token (IDENTIFIER); rust_assert (union_keyword->get_str () == Values::WeakKeywords::UNION); location_t locus = union_keyword->get_locus (); // parse actual union name const_TokenPtr union_name_tok = expect_token (IDENTIFIER); if (union_name_tok == nullptr) { skip_after_next_block (); return nullptr; } Identifier union_name{union_name_tok}; // parse optional generic parameters std::vector> generic_params = parse_generic_params_in_angles (); // parse optional where clause AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (LEFT_CURLY)) { skip_after_end_block (); return nullptr; } /* parse union inner items as "struct fields" because hey, syntax reuse. * Spec said so. */ std::vector union_fields = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); if (!skip_token (RIGHT_CURLY)) { // skip after somewhere return nullptr; } return std::unique_ptr ( new AST::Union (std::move (union_name), std::move (vis), std::move (generic_params), std::move (where_clause), std::move (union_fields), std::move (outer_attrs), locus)); } /* Parses a "constant item" (compile-time constant to maybe "inline" * throughout the program - like constexpr). */ template std::unique_ptr Parser::parse_const_item (AST::Visibility vis, AST::AttrVec outer_attrs) { location_t locus = lexer.peek_token ()->get_locus (); skip_token (CONST); /* get constant identifier - this is either a proper identifier or the _ * wildcard */ const_TokenPtr ident_tok = lexer.peek_token (); // make default identifier the underscore wildcard one std::string ident (Values::Keywords::UNDERSCORE); switch (ident_tok->get_id ()) { case IDENTIFIER: ident = ident_tok->get_str (); lexer.skip_token (); break; case UNDERSCORE: // do nothing - identifier is already "_" lexer.skip_token (); break; default: add_error ( Error (ident_tok->get_locus (), "expected item name (identifier or %<_%>) in constant item " "declaration - found %qs", ident_tok->get_token_description ())); skip_after_semicolon (); return nullptr; } if (!skip_token (COLON)) { skip_after_semicolon (); return nullptr; } // parse constant type (required) std::unique_ptr type = parse_type (); // A const with no given expression value if (lexer.peek_token ()->get_id () == SEMICOLON) { lexer.skip_token (); return std::unique_ptr ( new AST::ConstantItem (std::move (ident), std::move (vis), std::move (type), std::move (outer_attrs), locus)); } if (!skip_token (EQUAL)) { skip_after_semicolon (); return nullptr; } // parse constant expression (required) auto expr = parse_expr (); if (!expr) return nullptr; if (!skip_token (SEMICOLON)) { // skip somewhere? return nullptr; } return std::unique_ptr ( new AST::ConstantItem (std::move (ident), std::move (vis), std::move (type), std::move (expr.value ()), std::move (outer_attrs), locus)); } // Parses a "static item" (static storage item, with 'static lifetime). template std::unique_ptr Parser::parse_static_item (AST::Visibility vis, AST::AttrVec outer_attrs) { location_t locus = lexer.peek_token ()->get_locus (); skip_token (STATIC_KW); // determine whether static item is mutable bool is_mut = false; if (lexer.peek_token ()->get_id () == MUT) { is_mut = true; lexer.skip_token (); } const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) return nullptr; Identifier ident{ident_tok}; if (!skip_token (COLON)) { skip_after_semicolon (); return nullptr; } // parse static item type (required) std::unique_ptr type = parse_type (); if (!skip_token (EQUAL)) { skip_after_semicolon (); return nullptr; } // parse static item expression (required) auto expr = parse_expr (); if (!expr) return nullptr; if (!skip_token (SEMICOLON)) { // skip after somewhere return nullptr; } return std::unique_ptr ( new AST::StaticItem (std::move (ident), is_mut, std::move (type), std::move (expr.value ()), std::move (vis), std::move (outer_attrs), locus)); } // Parses a trait definition item, including unsafe ones. template std::unique_ptr Parser::parse_trait (AST::Visibility vis, AST::AttrVec outer_attrs) { location_t locus = lexer.peek_token ()->get_locus (); bool is_unsafe = false; bool is_auto_trait = false; if (lexer.peek_token ()->get_id () == UNSAFE) { is_unsafe = true; lexer.skip_token (); } if (lexer.peek_token ()->get_id () == AUTO) { is_auto_trait = true; lexer.skip_token (); } skip_token (TRAIT); // parse trait name const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) return nullptr; Identifier ident{ident_tok}; // parse generic parameters (if they exist) std::vector> generic_params = parse_generic_params_in_angles (); // create placeholder type param bounds in case they don't exist std::vector> type_param_bounds; // parse type param bounds (if they exist) if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); type_param_bounds = parse_type_param_bounds ( [] (TokenId id) { return id == WHERE || id == LEFT_CURLY; }); // type_param_bounds = parse_type_param_bounds (); } // parse where clause (if it exists) AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (LEFT_CURLY)) { skip_after_end_block (); return nullptr; } // parse inner attrs (if they exist) AST::AttrVec inner_attrs = parse_inner_attributes (); // parse trait items std::vector> trait_items; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_CURLY) { std::unique_ptr trait_item = parse_trait_item (); if (trait_item == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse trait item in trait"); add_error (std::move (error)); return nullptr; } trait_items.push_back (std::move (trait_item)); t = lexer.peek_token (); } if (!skip_token (RIGHT_CURLY)) { // skip after something return nullptr; } trait_items.shrink_to_fit (); return std::unique_ptr ( new AST::Trait (std::move (ident), is_unsafe, is_auto_trait, std::move (generic_params), std::move (type_param_bounds), std::move (where_clause), std::move (trait_items), std::move (vis), std::move (outer_attrs), std::move (inner_attrs), locus)); } // Parses a trait item used inside traits (not trait, the Item). template std::unique_ptr Parser::parse_trait_item () { // parse outer attributes (if they exist) AST::AttrVec outer_attrs = parse_outer_attributes (); auto vis_res = parse_visibility (); if (!vis_res) return nullptr; auto vis = vis_res.value (); // lookahead to determine what type of trait item to parse const_TokenPtr tok = lexer.peek_token (); switch (tok->get_id ()) { case SUPER: case SELF: case CRATE: case DOLLAR_SIGN: // these seem to be SimplePath tokens, so this is a macro invocation // semi return parse_macro_invocation_semi (std::move (outer_attrs)); case IDENTIFIER: if (lexer.peek_token ()->get_str () == Values::WeakKeywords::DEFAULT) return parse_function (std::move (vis), std::move (outer_attrs)); else return parse_macro_invocation_semi (std::move (outer_attrs)); case TYPE: return parse_trait_type (std::move (outer_attrs), vis); case CONST: // disambiguate with function qualifier if (lexer.peek_token (1)->get_id () == IDENTIFIER) { return parse_trait_const (std::move (outer_attrs)); } // else, fallthrough to function // TODO: find out how to disable gcc "implicit fallthrough" error gcc_fallthrough (); case ASYNC: case UNSAFE: case EXTERN_KW: case FN_KW: return parse_function (std::move (vis), std::move (outer_attrs)); default: break; } add_error (Error (tok->get_locus (), "unrecognised token %qs for item in trait", tok->get_token_description ())); // skip? return nullptr; } // Parse a typedef trait item. template std::unique_ptr Parser::parse_trait_type (AST::AttrVec outer_attrs, AST::Visibility vis) { location_t locus = lexer.peek_token ()->get_locus (); skip_token (TYPE); const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) return nullptr; Identifier ident{ident_tok}; // Parse optional generic parameters for GATs (Generic Associated Types) std::vector> generic_params; if (lexer.peek_token ()->get_id () == LEFT_ANGLE) { generic_params = parse_generic_params_in_angles (); } std::vector> bounds; // parse optional colon if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); // parse optional type param bounds bounds = parse_type_param_bounds ([] (TokenId id) { return id == SEMICOLON; }); // bounds = parse_type_param_bounds (); } if (!skip_token (SEMICOLON)) { // skip? return nullptr; } return std::unique_ptr ( new AST::TraitItemType (std::move (ident), std::move (generic_params), std::move (bounds), std::move (outer_attrs), vis, locus)); } // Parses a constant trait item. template std::unique_ptr Parser::parse_trait_const (AST::AttrVec outer_attrs) { location_t locus = lexer.peek_token ()->get_locus (); skip_token (CONST); // parse constant item name const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) return nullptr; Identifier ident{ident_tok}; if (!skip_token (COLON)) { skip_after_semicolon (); return nullptr; } // parse constant trait item type std::unique_ptr type = parse_type (); // parse constant trait body expression, if it exists std::unique_ptr const_body = nullptr; if (lexer.peek_token ()->get_id () == EQUAL) { lexer.skip_token (); // expression must exist, so parse it auto expr = parse_expr (); if (!expr) return nullptr; const_body = std::move (expr.value ()); } if (!skip_token (SEMICOLON)) { // skip after something? return nullptr; } return std::unique_ptr (new AST::ConstantItem ( std::move (ident), AST::Visibility::create_private (), std::move (type), std::move (const_body), std::move (outer_attrs), locus)); } /* Parses a struct "impl" item (both inherent impl and trait impl can be * parsed here), */ template std::unique_ptr Parser::parse_impl (AST::Visibility vis, AST::AttrVec outer_attrs) { /* Note that only trait impls are allowed to be unsafe. So if unsafe, it * must be a trait impl. However, this isn't enough for full disambiguation, * so don't branch here. */ location_t locus = lexer.peek_token ()->get_locus (); bool is_unsafe = false; if (lexer.peek_token ()->get_id () == UNSAFE) { lexer.skip_token (); is_unsafe = true; } if (!skip_token (IMPL)) { skip_after_next_block (); return nullptr; } // parse generic params (shared by trait and inherent impls) std::vector> generic_params = parse_generic_params_in_angles (); // Again, trait impl-only feature, but optional one, so can be used for // branching yet. bool has_exclam = false; if (lexer.peek_token ()->get_id () == EXCLAM) { lexer.skip_token (); has_exclam = true; } /* FIXME: code that doesn't look shit for TypePath. Also, make sure this * doesn't parse too much and not work. */ AST::TypePath type_path = parse_type_path (); if (type_path.is_error () || lexer.peek_token ()->get_id () != FOR) { /* cannot parse type path (or not for token next, at least), so must be * inherent impl */ // hacky conversion of TypePath stack object to Type pointer std::unique_ptr type = nullptr; if (!type_path.is_error ()) type = std::unique_ptr ( new AST::TypePath (std::move (type_path))); else type = parse_type (); // Type is required, so error if null if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse type in inherent impl"); add_error (std::move (error)); skip_after_next_block (); return nullptr; } // parse optional where clause AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (LEFT_CURLY)) { // TODO: does this still skip properly? skip_after_end_block (); return nullptr; } // parse inner attributes (optional) AST::AttrVec inner_attrs = parse_inner_attributes (); // parse inherent impl items std::vector> impl_items; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_CURLY) { std::unique_ptr impl_item = parse_inherent_impl_item (); if (impl_item == nullptr) { Error error ( lexer.peek_token ()->get_locus (), "failed to parse inherent impl item in inherent impl"); add_error (std::move (error)); return nullptr; } impl_items.push_back (std::move (impl_item)); t = lexer.peek_token (); } if (!skip_token (RIGHT_CURLY)) { // skip somewhere return nullptr; } // DEBUG rust_debug ("successfully parsed inherent impl"); impl_items.shrink_to_fit (); return std::unique_ptr (new AST::InherentImpl ( std::move (impl_items), std::move (generic_params), std::move (type), std::move (where_clause), std::move (vis), std::move (inner_attrs), std::move (outer_attrs), locus)); } else { // type path must both be valid and next token is for, so trait impl if (!skip_token (FOR)) { skip_after_next_block (); return nullptr; } // parse type std::unique_ptr type = parse_type (); // ensure type is included as it is required if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse type in trait impl"); add_error (std::move (error)); skip_after_next_block (); return nullptr; } // parse optional where clause AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (LEFT_CURLY)) { // TODO: does this still skip properly? skip_after_end_block (); return nullptr; } // parse inner attributes (optional) AST::AttrVec inner_attrs = parse_inner_attributes (); // parse trait impl items std::vector> impl_items; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_CURLY) { std::unique_ptr impl_item = parse_trait_impl_item (); if (impl_item == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse trait impl item in trait impl"); add_error (std::move (error)); return nullptr; } impl_items.push_back (std::move (impl_item)); t = lexer.peek_token (); // DEBUG rust_debug ("successfully parsed a trait impl item"); } // DEBUG rust_debug ("successfully finished trait impl items"); if (!skip_token (RIGHT_CURLY)) { // skip somewhere return nullptr; } // DEBUG rust_debug ("successfully parsed trait impl"); impl_items.shrink_to_fit (); return std::unique_ptr ( new AST::TraitImpl (std::move (type_path), is_unsafe, has_exclam, std::move (impl_items), std::move (generic_params), std::move (type), std::move (where_clause), std::move (vis), std::move (inner_attrs), std::move (outer_attrs), locus)); } } // Parses a single inherent impl item (item inside an inherent impl block). template std::unique_ptr Parser::parse_inherent_impl_item () { // parse outer attributes (if they exist) AST::AttrVec outer_attrs = parse_outer_attributes (); // TODO: cleanup - currently an unreadable mess // branch on next token: const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case IDENTIFIER: // FIXME: Arthur: Do we need to some lookahead here? return parse_macro_invocation_semi (outer_attrs); case SUPER: case SELF: case CRATE: case PUB: { // visibility, so not a macro invocation semi - must be constant, // function, or method auto vis_res = parse_visibility (); if (!vis_res) return nullptr; auto vis = vis_res.value (); // TODO: is a recursive call to parse_inherent_impl_item better? switch (lexer.peek_token ()->get_id ()) { case EXTERN_KW: case UNSAFE: case FN_KW: // function or method return parse_inherent_impl_function_or_method (std::move (vis), std::move ( outer_attrs)); case CONST: // lookahead to resolve production - could be function/method or // const item t = lexer.peek_token (1); switch (t->get_id ()) { case IDENTIFIER: case UNDERSCORE: return parse_const_item (std::move (vis), std::move (outer_attrs)); case UNSAFE: case EXTERN_KW: case FN_KW: return parse_inherent_impl_function_or_method (std::move (vis), std::move ( outer_attrs)); default: add_error (Error (t->get_locus (), "unexpected token %qs in some sort of const " "item in inherent impl", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } default: add_error ( Error (t->get_locus (), "unrecognised token %qs for item in inherent impl", t->get_token_description ())); // skip? return nullptr; } } case ASYNC: case EXTERN_KW: case UNSAFE: case FN_KW: // function or method return parse_inherent_impl_function_or_method ( AST::Visibility::create_private (), std::move (outer_attrs)); case CONST: /* lookahead to resolve production - could be function/method or const * item */ t = lexer.peek_token (1); switch (t->get_id ()) { case IDENTIFIER: case UNDERSCORE: return parse_const_item (AST::Visibility::create_private (), std::move (outer_attrs)); case UNSAFE: case EXTERN_KW: case FN_KW: return parse_inherent_impl_function_or_method ( AST::Visibility::create_private (), std::move (outer_attrs)); default: add_error (Error (t->get_locus (), "unexpected token %qs in some sort of const item " "in inherent impl", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } rust_unreachable (); default: add_error (Error (t->get_locus (), "unrecognised token %qs for item in inherent impl", t->get_token_description ())); // skip? return nullptr; } } /* For internal use only by parse_inherent_impl_item() - splits giant method * into smaller ones and prevents duplication of logic. Strictly, this parses * a function or method item inside an inherent impl item block. */ // TODO: make this a templated function with "return type" as type param - // InherentImplItem is this specialisation of the template while TraitImplItem // will be the other. template std::unique_ptr Parser::parse_inherent_impl_function_or_method ( AST::Visibility vis, AST::AttrVec outer_attrs) { location_t locus = lexer.peek_token ()->get_locus (); // parse function or method qualifiers auto qualifiers = parse_function_qualifiers (); if (!qualifiers) return nullptr; skip_token (FN_KW); // parse function or method name const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) return nullptr; Identifier ident{ident_tok}; // parse generic params std::vector> generic_params = parse_generic_params_in_angles (); if (!skip_token (LEFT_PAREN)) { // skip after somewhere? return nullptr; } // now for function vs method disambiguation - method has opening "self" // param auto initial_param = parse_self_param (); if (!initial_param.has_value () && initial_param.error ().kind != Parse::Error::Self::Kind::NOT_SELF) return nullptr; /* FIXME: ensure that self param doesn't accidently consume tokens for a * function one idea is to lookahead up to 4 tokens to see whether self is * one of them */ bool is_method = false; if (initial_param.has_value ()) { if ((*initial_param)->is_self ()) is_method = true; /* skip comma so function and method regular params can be parsed in * same way */ if (lexer.peek_token ()->get_id () == COMMA) lexer.skip_token (); } // parse trait function params std::vector> function_params = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); if (initial_param.has_value ()) function_params.insert (function_params.begin (), std::move (*initial_param)); if (!skip_token (RIGHT_PAREN)) { skip_after_end_block (); return nullptr; } // parse return type (optional) std::unique_ptr return_type = parse_function_return_type (); // parse where clause (optional) AST::WhereClause where_clause = parse_where_clause (); tl::optional> body = tl::nullopt; if (lexer.peek_token ()->get_id () == SEMICOLON) lexer.skip_token (); else { auto result = parse_block_expr (); if (!result) { Error error ( lexer.peek_token ()->get_locus (), "could not parse definition in inherent impl %s definition", is_method ? "method" : "function"); add_error (std::move (error)); skip_after_end_block (); return nullptr; } body = std::move (result.value ()); } return std::unique_ptr ( new AST::Function (std::move (ident), std::move (qualifiers.value ()), std::move (generic_params), std::move (function_params), std::move (return_type), std::move (where_clause), std::move (body), std::move (vis), std::move (outer_attrs), locus)); } // Parses a single trait impl item (item inside a trait impl block). template std::unique_ptr Parser::parse_trait_impl_item () { // parse outer attributes (if they exist) AST::AttrVec outer_attrs = parse_outer_attributes (); auto vis_res = parse_visibility (); if (!vis_res) return nullptr; auto visibility = vis_res.value (); // branch on next token: const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case SUPER: case SELF: case CRATE: case DOLLAR_SIGN: // these seem to be SimplePath tokens, so this is a macro invocation // semi return parse_macro_invocation_semi (std::move (outer_attrs)); case IDENTIFIER: if (lexer.peek_token ()->get_str () == Values::WeakKeywords::DEFAULT) return parse_trait_impl_function_or_method (visibility, std::move (outer_attrs)); else return parse_macro_invocation_semi (std::move (outer_attrs)); case TYPE: return parse_type_alias (visibility, std::move (outer_attrs)); case EXTERN_KW: case UNSAFE: case FN_KW: // function or method return parse_trait_impl_function_or_method (visibility, std::move (outer_attrs)); case ASYNC: return parse_async_item (visibility, std::move (outer_attrs)); case CONST: // lookahead to resolve production - could be function/method or const // item t = lexer.peek_token (1); switch (t->get_id ()) { case IDENTIFIER: case UNDERSCORE: return parse_const_item (visibility, std::move (outer_attrs)); case UNSAFE: case EXTERN_KW: case FN_KW: return parse_trait_impl_function_or_method (visibility, std::move (outer_attrs)); default: add_error (Error ( t->get_locus (), "unexpected token %qs in some sort of const item in trait impl", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } rust_unreachable (); default: break; } add_error (Error (t->get_locus (), "unrecognised token %qs for item in trait impl", t->get_token_description ())); // skip? return nullptr; } /* For internal use only by parse_trait_impl_item() - splits giant method into * smaller ones and prevents duplication of logic. Strictly, this parses a * function or method item inside a trait impl item block. */ template std::unique_ptr Parser::parse_trait_impl_function_or_method ( AST::Visibility vis, AST::AttrVec outer_attrs) { // this shares virtually all logic with // parse_inherent_impl_function_or_method // - template? location_t locus = lexer.peek_token ()->get_locus (); // parse function or method qualifiers auto qualifiers = parse_function_qualifiers (); if (!qualifiers) return nullptr; skip_token (FN_KW); // parse function or method name const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) { return nullptr; } Identifier ident{ident_tok}; // DEBUG: rust_debug ( "about to start parsing generic params in trait impl function or method"); // parse generic params std::vector> generic_params = parse_generic_params_in_angles (); // DEBUG: rust_debug ( "finished parsing generic params in trait impl function or method"); if (!skip_token (LEFT_PAREN)) { // skip after somewhere? return nullptr; } // now for function vs method disambiguation - method has opening "self" // param auto initial_param = parse_self_param (); if (!initial_param.has_value () && initial_param.error ().kind != Parse::Error::Self::Kind::NOT_SELF) return nullptr; // FIXME: ensure that self param doesn't accidently consume tokens for a // function bool is_method = false; if (initial_param.has_value ()) { if ((*initial_param)->is_self ()) is_method = true; // skip comma so function and method regular params can be parsed in // same way if (lexer.peek_token ()->get_id () == COMMA) { lexer.skip_token (); } // DEBUG rust_debug ("successfully parsed self param in method trait impl item"); } // DEBUG rust_debug ( "started to parse function params in function or method trait impl item"); // parse trait function params (only if next token isn't right paren) std::vector> function_params; if (lexer.peek_token ()->get_id () != RIGHT_PAREN) { function_params = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); if (function_params.empty ()) { Error error ( lexer.peek_token ()->get_locus (), "failed to parse function params in trait impl %s definition", is_method ? "method" : "function"); add_error (std::move (error)); skip_after_next_block (); return nullptr; } } if (initial_param.has_value ()) function_params.insert (function_params.begin (), std::move (*initial_param)); // DEBUG rust_debug ("successfully parsed function params in function or method " "trait impl item"); if (!skip_token (RIGHT_PAREN)) { skip_after_next_block (); return nullptr; } // parse return type (optional) std::unique_ptr return_type = parse_function_return_type (); // DEBUG rust_debug ( "successfully parsed return type in function or method trait impl item"); // parse where clause (optional) AST::WhereClause where_clause = parse_where_clause (); // DEBUG rust_debug ( "successfully parsed where clause in function or method trait impl item"); // parse function definition (in block) - semicolon not allowed tl::optional> body = tl::nullopt; if (lexer.peek_token ()->get_id () == SEMICOLON) lexer.skip_token (); else { auto result = parse_block_expr (); if (!result) { Error error (lexer.peek_token ()->get_locus (), "could not parse definition in trait impl %s definition", is_method ? "method" : "function"); add_error (std::move (error)); skip_after_end_block (); return nullptr; } body = std::move (result.value ()); } return std::unique_ptr ( new AST::Function (std::move (ident), std::move (qualifiers.value ()), std::move (generic_params), std::move (function_params), std::move (return_type), std::move (where_clause), std::move (body), std::move (vis), std::move (outer_attrs), locus)); } // Parses an extern block of declarations. template std::unique_ptr Parser::parse_extern_block (AST::Visibility vis, AST::AttrVec outer_attrs) { location_t locus = lexer.peek_token ()->get_locus (); skip_token (EXTERN_KW); // detect optional abi name std::string abi; const_TokenPtr next_tok = lexer.peek_token (); if (next_tok->get_id () == STRING_LITERAL) { lexer.skip_token (); abi = next_tok->get_str (); } if (!skip_token (LEFT_CURLY)) { skip_after_end_block (); return nullptr; } AST::AttrVec inner_attrs = parse_inner_attributes (); // parse declarations inside extern block std::vector> extern_items; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_CURLY) { std::unique_ptr extern_item = parse_external_item (); if (extern_item == nullptr) { Error error (t->get_locus (), "failed to parse external item despite not reaching " "end of extern block"); add_error (std::move (error)); return nullptr; } extern_items.push_back (std::move (extern_item)); t = lexer.peek_token (); } if (!skip_token (RIGHT_CURLY)) { // skip somewhere return nullptr; } extern_items.shrink_to_fit (); return std::unique_ptr ( new AST::ExternBlock (std::move (abi), std::move (extern_items), std::move (vis), std::move (inner_attrs), std::move (outer_attrs), locus)); } // Parses a single extern block item (static or function declaration). template std::unique_ptr Parser::parse_external_item () { // parse optional outer attributes AST::AttrVec outer_attrs = parse_outer_attributes (); location_t locus = lexer.peek_token ()->get_locus (); // parse optional visibility auto vis_res = parse_visibility (); if (!vis_res) return nullptr; auto vis = vis_res.value (); const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case IDENTIFIER: return parse_macro_invocation_semi (outer_attrs); case STATIC_KW: { // parse extern static item lexer.skip_token (); // parse mut (optional) bool has_mut = false; if (lexer.peek_token ()->get_id () == MUT) { lexer.skip_token (); has_mut = true; } // parse identifier const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) { skip_after_semicolon (); return nullptr; } Identifier ident{ident_tok}; if (!skip_token (COLON)) { skip_after_semicolon (); return nullptr; } // parse type (required) std::unique_ptr type = parse_type (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse type in external static item"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } if (!skip_token (SEMICOLON)) { // skip after somewhere? return nullptr; } return std::unique_ptr ( new AST::ExternalStaticItem (std::move (ident), std::move (type), has_mut, std::move (vis), std::move (outer_attrs), locus)); } case FN_KW: return parse_function (std::move (vis), std::move (outer_attrs), true); case TYPE: return parse_external_type_item (std::move (vis), std::move (outer_attrs)); default: // error add_error ( Error (t->get_locus (), "unrecognised token %qs in extern block item declaration", t->get_token_description ())); skip_after_semicolon (); return nullptr; } } // Parses a statement (will further disambiguate any statement). template std::unique_ptr Parser::parse_stmt (ParseRestrictions restrictions) { // quick exit for empty statement // FIXME: Can we have empty statements without semicolons? Just nothing? const_TokenPtr t = lexer.peek_token (); if (t->get_id () == SEMICOLON) { lexer.skip_token (); return std::unique_ptr ( new AST::EmptyStmt (t->get_locus ())); } // parse outer attributes AST::AttrVec outer_attrs = parse_outer_attributes (); // parsing this will be annoying because of the many different possibilities /* best may be just to copy paste in parse_item switch, and failing that try * to parse outer attributes, and then pass them in to either a let * statement or (fallback) expression statement. */ // FIXME: think of a way to do this without such a large switch? t = lexer.peek_token (); switch (t->get_id ()) { case LET: // let statement return parse_let_stmt (std::move (outer_attrs), restrictions); case PUB: case MOD: case EXTERN_KW: case USE: case FN_KW: case TYPE: case STRUCT_KW: case ENUM_KW: case CONST: case STATIC_KW: case AUTO: case TRAIT: case IMPL: case MACRO: /* TODO: implement union keyword but not really because of * context-dependence crappy hack way to parse a union written below to * separate it from the good code. */ // case UNION: case UNSAFE: // maybe - unsafe traits are a thing /* if any of these (should be all possible VisItem prefixes), parse a * VisItem can't parse item because would require reparsing outer * attributes */ // may also be unsafe block if (lexer.peek_token (1)->get_id () == LEFT_CURLY) { return parse_expr_stmt (std::move (outer_attrs), restrictions); } else { return parse_vis_item (std::move (outer_attrs)); } break; // crappy hack to do union "keyword" case IDENTIFIER: if (t->get_str () == Values::WeakKeywords::UNION && lexer.peek_token (1)->get_id () == IDENTIFIER) { return parse_vis_item (std::move (outer_attrs)); // or should this go straight to parsing union? } else if (is_macro_rules_def (t)) { // macro_rules! macro item return parse_macro_rules_def (std::move (outer_attrs)); } gcc_fallthrough (); // TODO: find out how to disable gcc "implicit fallthrough" warning default: // fallback: expression statement return parse_expr_stmt (std::move (outer_attrs), restrictions); break; } } // Parses a let statement. template std::unique_ptr Parser::parse_let_stmt (AST::AttrVec outer_attrs, ParseRestrictions restrictions) { location_t locus = lexer.peek_token ()->get_locus (); skip_token (LET); // parse pattern (required) std::unique_ptr pattern = parse_pattern (); if (pattern == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse pattern in let statement"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } // parse type declaration (optional) std::unique_ptr type = nullptr; if (lexer.peek_token ()->get_id () == COLON) { // must have a type declaration lexer.skip_token (); type = parse_type (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse type in let statement"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } } // parse expression to set variable to (optional) std::unique_ptr expr = nullptr; if (lexer.peek_token ()->get_id () == EQUAL) { // must have an expression lexer.skip_token (); auto expr_res = parse_expr (); if (!expr_res) { skip_after_semicolon (); return nullptr; } expr = std::move (expr_res.value ()); } tl::optional> else_expr = tl::nullopt; if (maybe_skip_token (ELSE)) { auto block_expr = parse_block_expr (); if (block_expr) else_expr = tl::optional>{ std::move (block_expr.value ())}; else else_expr = tl::nullopt; } if (restrictions.consume_semi) { // `stmt` macro variables are parsed without a semicolon, but should be // parsed as a full statement when interpolated. This should be handled // by having the interpolated statement be distinguishable from normal // tokens, e.g. by NT tokens. if (restrictions.allow_close_after_expr_stmt) maybe_skip_token (SEMICOLON); else if (!skip_token (SEMICOLON)) return nullptr; } return std::unique_ptr ( new AST::LetStmt (std::move (pattern), std::move (expr), std::move (type), std::move (else_expr), std::move (outer_attrs), locus)); } template tl::optional Parser::parse_generic_arg () { auto tok = lexer.peek_token (); std::unique_ptr expr = nullptr; switch (tok->get_id ()) { case IDENTIFIER: { // This is a bit of a weird situation: With an identifier token, we // could either have a valid type or a macro (FIXME: anything else?). So // we need one bit of lookahead to differentiate if this is really auto next_tok = lexer.peek_token (1); if (next_tok->get_id () == LEFT_ANGLE || next_tok->get_id () == SCOPE_RESOLUTION || next_tok->get_id () == EXCLAM) { auto type = parse_type (); if (type) return AST::GenericArg::create_type (std::move (type)); else return tl::nullopt; } else if (next_tok->get_id () == COLON) { lexer.skip_token (); // skip ident lexer.skip_token (); // skip colon auto tok = lexer.peek_token (); std::vector> bounds = parse_type_param_bounds (); auto type = std::unique_ptr ( new AST::TraitObjectType (std::move (bounds), tok->get_locus (), false)); if (type) return AST::GenericArg::create_type (std::move (type)); else return tl::nullopt; } lexer.skip_token (); return AST::GenericArg::create_ambiguous (tok->get_str (), tok->get_locus ()); } case LEFT_CURLY: { auto res = parse_block_expr (); if (res) expr = std::move (res.value ()); else return tl::nullopt; } break; case MINUS: case STRING_LITERAL: case CHAR_LITERAL: case INT_LITERAL: case FLOAT_LITERAL: case TRUE_LITERAL: case FALSE_LITERAL: { auto res = parse_literal_expr (); if (res) expr = std::move (res.value ()); else return tl::nullopt; } break; // FIXME: Because of this, error reporting is garbage for const generic // parameter's default values default: { auto type = parse_type (); // FIXME: Find a better way to do this? if (type) return AST::GenericArg::create_type (std::move (type)); else return tl::nullopt; } } if (!expr) return tl::nullopt; return AST::GenericArg::create_const (std::move (expr)); } // Parses the generic arguments in each path segment. template AST::GenericArgs Parser::parse_path_generic_args () { if (lexer.peek_token ()->get_id () == LEFT_SHIFT) lexer.split_current_token (LEFT_ANGLE, LEFT_ANGLE); if (!skip_token (LEFT_ANGLE)) { // skip after somewhere? return AST::GenericArgs::create_empty (); } // We need to parse all lifetimes, then parse types and const generics in // any order. // try to parse lifetimes first std::vector lifetime_args; const_TokenPtr t = lexer.peek_token (); location_t locus = t->get_locus (); while (!Parse::Utils::is_right_angle_tok (t->get_id ())) { auto lifetime = parse_lifetime (false); if (!lifetime) { // not necessarily an error break; } lifetime_args.push_back (std::move (lifetime.value ())); // if next token isn't comma, then it must be end of list if (lexer.peek_token ()->get_id () != COMMA) { break; } // skip comma lexer.skip_token (); t = lexer.peek_token (); } // try to parse types and const generics second std::vector generic_args; // TODO: think of better control structure t = lexer.peek_token (); while (!Parse::Utils::is_right_angle_tok (t->get_id ())) { // FIXME: Is it fine to break if there is one binding? Can't there be // bindings in between types? // ensure not binding being parsed as type accidently if (t->get_id () == IDENTIFIER && lexer.peek_token (1)->get_id () == EQUAL) break; auto arg = parse_generic_arg (); if (arg) { generic_args.emplace_back (std::move (arg.value ())); } // FIXME: Do we need to break if we encounter an error? // if next token isn't comma, then it must be end of list if (lexer.peek_token ()->get_id () != COMMA) break; // skip comma lexer.skip_token (); t = lexer.peek_token (); } // try to parse bindings third std::vector binding_args; // TODO: think of better control structure t = lexer.peek_token (); while (!Parse::Utils::is_right_angle_tok (t->get_id ())) { AST::GenericArgsBinding binding = parse_generic_args_binding (); if (binding.is_error ()) { // not necessarily an error break; } binding_args.push_back (std::move (binding)); // if next token isn't comma, then it must be end of list if (lexer.peek_token ()->get_id () != COMMA) { break; } // skip comma lexer.skip_token (); t = lexer.peek_token (); } // skip any trailing commas if (lexer.peek_token ()->get_id () == COMMA) lexer.skip_token (); if (!skip_generics_right_angle ()) return AST::GenericArgs::create_empty (); lifetime_args.shrink_to_fit (); generic_args.shrink_to_fit (); binding_args.shrink_to_fit (); return AST::GenericArgs (std::move (lifetime_args), std::move (generic_args), std::move (binding_args), locus); } // Parses a binding in a generic args path segment. template AST::GenericArgsBinding Parser::parse_generic_args_binding () { const_TokenPtr ident_tok = lexer.peek_token (); if (ident_tok->get_id () != IDENTIFIER) { // allow non error-inducing use // skip somewhere? return AST::GenericArgsBinding::create_error (); } lexer.skip_token (); Identifier ident{ident_tok}; if (!skip_token (EQUAL)) { // skip after somewhere? return AST::GenericArgsBinding::create_error (); } // parse type (required) std::unique_ptr type = parse_type (); if (type == nullptr) { // skip somewhere? return AST::GenericArgsBinding::create_error (); } return AST::GenericArgsBinding (std::move (ident), std::move (type), ident_tok->get_locus ()); } // Parses a self param. Also handles self param not existing. template tl::expected, Parse::Error::Self> Parser::parse_self_param () { bool has_reference = false; AST::Lifetime lifetime = AST::Lifetime::elided (); location_t locus = lexer.peek_token ()->get_locus (); // TODO: Feels off, find a better way to clearly express this std::vector> ptrs = {{ASTERISK, SELF} /* *self */, {ASTERISK, CONST, SELF} /* *const self */, {ASTERISK, MUT, SELF} /* *mut self */}; for (auto &s : ptrs) { size_t i = 0; for (i = 0; i < s.size (); i++) if (lexer.peek_token (i)->get_id () != s[i]) break; if (i == s.size ()) { Error error (lexer.peek_token ()->get_locus (), "cannot pass % by raw pointer"); add_error (std::move (error)); return Parse::Error::Self::make_self_raw_pointer (); } } // Trying to find those patterns: // // &'lifetime mut self // &'lifetime self // & mut self // & self // mut self // self // // If not found, it is probably a function, exit and let function parsing // handle it. bool is_self = false; for (size_t i = 0; i < 5; i++) if (lexer.peek_token (i)->get_id () == SELF) is_self = true; if (!is_self) return Parse::Error::Self::make_not_self (); // test if self is a reference parameter if (lexer.peek_token ()->get_id () == AMP) { has_reference = true; lexer.skip_token (); // now test whether it has a lifetime if (lexer.peek_token ()->get_id () == LIFETIME) { // something went wrong somehow if (auto parsed_lifetime = parse_lifetime (true)) { lifetime = parsed_lifetime.value (); } else { Error error (lexer.peek_token ()->get_locus (), "failed to parse lifetime in self param"); add_error (std::move (error)); // skip after somewhere? return Parse::Error::Self::make_parsing_error (); } } } // test for mut bool has_mut = false; if (lexer.peek_token ()->get_id () == MUT) { has_mut = true; lexer.skip_token (); } // skip self token const_TokenPtr self_tok = lexer.peek_token (); if (self_tok->get_id () != SELF) { // skip after somewhere? return Parse::Error::Self::make_not_self (); } lexer.skip_token (); // parse optional type std::unique_ptr type = nullptr; if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); // type is now required type = parse_type (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse type in self param"); add_error (std::move (error)); // skip after somewhere? return Parse::Error::Self::make_parsing_error (); } } // ensure that cannot have both type and reference if (type && has_reference) { Error error ( lexer.peek_token ()->get_locus (), "cannot have both a reference and a type specified in a self param"); add_error (std::move (error)); // skip after somewhere? return Parse::Error::Self::make_parsing_error (); } if (has_reference) { return std::make_unique (std::move (lifetime), has_mut, locus); } else { // note that type may be nullptr here and that's fine return std::make_unique (std::move (type), has_mut, locus); } } /* Parses an expression or macro statement. */ template std::unique_ptr Parser::parse_expr_stmt (AST::AttrVec outer_attrs, ParseRestrictions restrictions) { location_t locus = lexer.peek_token ()->get_locus (); tl::expected, Parse::Error::Expr> expr; switch (lexer.peek_token ()->get_id ()) { case IDENTIFIER: case CRATE: case SUPER: case SELF: case SELF_ALIAS: case DOLLAR_SIGN: case SCOPE_RESOLUTION: { AST::PathInExpression path = parse_path_in_expression (); tl::expected, Parse::Error::Expr> null_denotation; if (lexer.peek_token ()->get_id () == EXCLAM) { std::unique_ptr invoc = parse_macro_invocation_partial (std::move (path), std::move (outer_attrs)); if (restrictions.consume_semi && maybe_skip_token (SEMICOLON)) { invoc->add_semicolon (); // Macro invocation with semicolon. return invoc; } TokenId after_macro = lexer.peek_token ()->get_id (); if (restrictions.allow_close_after_expr_stmt && (after_macro == RIGHT_PAREN || after_macro == RIGHT_CURLY || after_macro == RIGHT_SQUARE)) return invoc; if (invoc->get_invoc_data ().get_delim_tok_tree ().get_delim_type () == AST::CURLY && after_macro != DOT && after_macro != QUESTION_MARK) { rust_debug ("braced macro statement"); return invoc; } null_denotation = std::move (invoc); } else { null_denotation = null_denotation_path (std::move (path), {}, restrictions); } expr = left_denotations (std::move (null_denotation), LBP_LOWEST, std::move (outer_attrs), restrictions); break; } default: restrictions.expr_can_be_stmt = true; expr = parse_expr (std::move (outer_attrs), restrictions); break; } if (!expr) { // expr is required, error Error error (lexer.peek_token ()->get_locus (), "failed to parse expr in expr statement"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } bool has_semi = false; if (restrictions.consume_semi) { if (maybe_skip_token (SEMICOLON)) { has_semi = true; } else if (expr.value ()->is_expr_without_block ()) { if (restrictions.allow_close_after_expr_stmt) { TokenId id = lexer.peek_token ()->get_id (); if (id != RIGHT_PAREN && id != RIGHT_CURLY && id != RIGHT_SQUARE) { expect_token (SEMICOLON); return nullptr; } } else { expect_token (SEMICOLON); return nullptr; } } } return std::make_unique (std::move (expr.value ()), locus, has_semi); } // Parses a loop label used in loop expressions. template tl::expected Parser::parse_loop_label (const_TokenPtr tok) { // parse lifetime - if doesn't exist, assume no label if (tok->get_id () != LIFETIME) { // not necessarily an error return Parse::Error::LoopLabel::make_not_loop_label (); } /* FIXME: check for named lifetime requirement here? or check in semantic * analysis phase? */ AST::Lifetime label = lifetime_from_token (tok); if (!skip_token (COLON)) { // skip somewhere? Parse::Error::LoopLabel::make_missing_colon (); } return tl::expected ( AST::LoopLabel (std::move (label), tok->get_locus ())); } // Parses the "pattern" part of the match arm (the 'case x:' equivalent). template AST::MatchArm Parser::parse_match_arm () { // parse optional outer attributes AST::AttrVec outer_attrs = parse_outer_attributes (); // DEBUG rust_debug ("about to start parsing match arm patterns"); // break early if find right curly if (lexer.peek_token ()->get_id () == RIGHT_CURLY) { // not an error return AST::MatchArm::create_error (); } // parse match arm patterns - at least 1 is required std::unique_ptr match_arm_pattern = parse_match_arm_pattern (RIGHT_CURLY); if (match_arm_pattern == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse any patterns in match arm"); add_error (std::move (error)); // skip somewhere? return AST::MatchArm::create_error (); } // DEBUG rust_debug ("successfully parsed match arm patterns"); // parse match arm guard expr if it exists std::unique_ptr guard_expr = nullptr; if (lexer.peek_token ()->get_id () == IF) { lexer.skip_token (); auto guard_expr_res = parse_expr (); if (!guard_expr_res) { Error error (lexer.peek_token ()->get_locus (), "failed to parse guard expression in match arm"); add_error (std::move (error)); // skip somewhere? return AST::MatchArm::create_error (); } guard_expr = std::move (guard_expr_res.value ()); } // DEBUG rust_debug ("successfully parsed match arm"); return AST::MatchArm (std::move (match_arm_pattern), lexer.peek_token ()->get_locus (), std::move (guard_expr), std::move (outer_attrs)); } /* Parses the patterns used in a match arm. End token id is the id of the * token that would exist after the patterns are done (e.g. '}' for match * expr, '=' for if let and while let). */ template std::unique_ptr Parser::parse_match_arm_pattern (TokenId end_token_id) { // skip optional leading '|' if (lexer.peek_token ()->get_id () == PIPE) lexer.skip_token (); /* TODO: do I even need to store the result of this? can't be used. * If semantically different, I need a wrapped "match arm patterns" object * for this. */ std::unique_ptr pattern; // quick break out if end_token_id if (lexer.peek_token ()->get_id () == end_token_id) return pattern; // parse required pattern - if doesn't exist, return empty std::unique_ptr initial_pattern = parse_pattern (); if (initial_pattern == nullptr) { // FIXME: should this be an error? return pattern; } return initial_pattern; } // Parses a single parameter used in a closure definition. template AST::ClosureParam Parser::parse_closure_param () { AST::AttrVec outer_attrs = parse_outer_attributes (); // parse pattern (which is required) std::unique_ptr pattern = parse_pattern_no_alt (); if (pattern == nullptr) { // not necessarily an error return AST::ClosureParam::create_error (); } // parse optional type of param std::unique_ptr type = nullptr; if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); // parse type, which is now required type = parse_type (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse type in closure parameter"); add_error (std::move (error)); // skip somewhere? return AST::ClosureParam::create_error (); } } location_t loc = pattern->get_locus (); return AST::ClosureParam (std::move (pattern), loc, std::move (type), std::move (outer_attrs)); } // Parses a type (will further disambiguate any type). template std::unique_ptr Parser::parse_type (bool save_errors) { /* rules for all types: * NeverType: '!' * SliceType: '[' Type ']' * InferredType: '_' * MacroInvocation: SimplePath '!' DelimTokenTree * ParenthesisedType: '(' Type ')' * ImplTraitType: 'impl' TypeParamBounds * TypeParamBounds (not type) TypeParamBound ( '+' TypeParamBound )* '+'? * TypeParamBound Lifetime | TraitBound * ImplTraitTypeOneBound: 'impl' TraitBound * TraitObjectType: 'dyn'? TypeParamBounds * TraitObjectTypeOneBound: 'dyn'? TraitBound * TraitBound '?'? ForLifetimes? TypePath | '(' '?'? * ForLifetimes? TypePath ')' BareFunctionType: ForLifetimes? * FunctionQualifiers 'fn' etc. ForLifetimes (not type) 'for' '<' * LifetimeParams '>' FunctionQualifiers ( 'async' | 'const' )? * 'unsafe'? * ('extern' abi?)? QualifiedPathInType: '<' Type ( 'as' TypePath )? '>' * ( * '::' TypePathSegment )+ TypePath: '::'? TypePathSegment ( * '::' TypePathSegment)* ArrayType: '[' Type ';' Expr ']' * ReferenceType: '&' Lifetime? 'mut'? TypeNoBounds * RawPointerType: '*' ( 'mut' | 'const' ) TypeNoBounds * TupleType: '(' Type etc. - regular tuple stuff. Also * regular tuple vs parenthesised precedence * * Disambiguate between macro and type path via type path being parsed, and * then if '!' found, convert type path to simple path for macro. Usual * disambiguation for tuple vs parenthesised. For ImplTraitType and * TraitObjectType individual disambiguations, they seem more like "special * cases", so probably just try to parse the more general ImplTraitType or * TraitObjectType and return OneBound versions if they satisfy those * criteria. */ const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case EXCLAM: // never type - can't be macro as no path beforehand lexer.skip_token (); return std::unique_ptr ( new AST::NeverType (t->get_locus ())); case LEFT_SQUARE: // slice type or array type - requires further disambiguation return parse_slice_or_array_type (); case LEFT_SHIFT: case LEFT_ANGLE: { // qualified path in type AST::QualifiedPathInType path = parse_qualified_path_in_type (); if (path.is_error ()) { if (save_errors) { Error error (t->get_locus (), "failed to parse qualified path in type"); add_error (std::move (error)); } return nullptr; } return std::unique_ptr ( new AST::QualifiedPathInType (std::move (path))); } case UNDERSCORE: // inferred type lexer.skip_token (); return std::unique_ptr ( new AST::InferredType (t->get_locus ())); case ASTERISK: // raw pointer type return parse_raw_pointer_type (); case AMP: // does this also include AMP_AMP? case LOGICAL_AND: // reference type return parse_reference_type (); case LIFETIME: { /* probably a lifetime bound, so probably type param bounds in * TraitObjectType */ std::vector> bounds = parse_type_param_bounds (); return std::unique_ptr ( new AST::TraitObjectType (std::move (bounds), t->get_locus (), false)); } case IDENTIFIER: case SUPER: case SELF: case SELF_ALIAS: case CRATE: case DOLLAR_SIGN: case SCOPE_RESOLUTION: { // macro invocation or type path - requires further disambiguation. /* for parsing path component of each rule, perhaps parse it as a * typepath and attempt conversion to simplepath if a trailing '!' is * found */ /* Type path also includes TraitObjectTypeOneBound BUT if it starts * with it, it is exactly the same as a TypePath syntactically, so * this is a syntactical ambiguity. As such, the parser will parse it * as a TypePath. This, however, does not prevent TraitObjectType from * starting with a typepath. */ // parse path as type path AST::TypePath path = parse_type_path (); if (path.is_error ()) { if (save_errors) { Error error (t->get_locus (), "failed to parse path as first component of type"); add_error (std::move (error)); } return nullptr; } location_t locus = path.get_locus (); // branch on next token t = lexer.peek_token (); switch (t->get_id ()) { case EXCLAM: { // macro invocation // convert to simple path AST::SimplePath macro_path = path.as_simple_path (); if (macro_path.is_empty ()) { if (save_errors) { Error error (t->get_locus (), "failed to parse simple path in macro " "invocation (for type)"); add_error (std::move (error)); } return nullptr; } lexer.skip_token (); auto tok_tree = parse_delim_token_tree (); if (!tok_tree) return nullptr; return AST::MacroInvocation::Regular ( AST::MacroInvocData (std::move (macro_path), std::move (tok_tree.value ())), {}, locus); } case PLUS: { // type param bounds std::vector> bounds; // convert type path to trait bound std::unique_ptr path_bound ( new AST::TraitBound (std::move (path), locus, false, false)); bounds.push_back (std::move (path_bound)); /* parse rest of bounds - FIXME: better way to find when to stop * parsing */ while (t->get_id () == PLUS) { lexer.skip_token (); // parse bound if it exists - if not, assume end of sequence std::unique_ptr bound = parse_type_param_bound (); if (bound == nullptr) { break; } bounds.push_back (std::move (bound)); t = lexer.peek_token (); } return std::unique_ptr ( new AST::TraitObjectType (std::move (bounds), locus, false)); } default: // assume that this is a type path and not an error return std::unique_ptr ( new AST::TypePath (std::move (path))); } } case LEFT_PAREN: /* tuple type or parenthesised type - requires further disambiguation * (the usual). ok apparently can be a parenthesised TraitBound too, so * could be TraitObjectTypeOneBound or TraitObjectType */ return parse_paren_prefixed_type (); case FOR: // TraitObjectTypeOneBound or BareFunctionType return parse_for_prefixed_type (); case ASYNC: case CONST: case UNSAFE: case EXTERN_KW: case FN_KW: // bare function type (with no for lifetimes) return parse_bare_function_type (std::vector ()); case IMPL: lexer.skip_token (); if (lexer.peek_token ()->get_id () == LIFETIME) { /* cannot be one bound because lifetime prevents it from being * traitbound */ std::vector> bounds = parse_type_param_bounds (); return std::unique_ptr ( new AST::ImplTraitType (std::move (bounds), t->get_locus ())); } else { // should be trait bound, so parse trait bound std::unique_ptr initial_bound = parse_trait_bound (); if (initial_bound == nullptr) { if (save_errors) { Error error (lexer.peek_token ()->get_locus (), "failed to parse ImplTraitType initial bound"); add_error (std::move (error)); } return nullptr; } location_t locus = t->get_locus (); // short cut if next token isn't '+' t = lexer.peek_token (); if (t->get_id () != PLUS) { return std::unique_ptr ( new AST::ImplTraitTypeOneBound (std::move (initial_bound), locus)); } // parse additional type param bounds std::vector> bounds; bounds.push_back (std::move (initial_bound)); while (t->get_id () == PLUS) { lexer.skip_token (); // parse bound if it exists std::unique_ptr bound = parse_type_param_bound (); if (bound == nullptr) { // not an error as trailing plus may exist break; } bounds.push_back (std::move (bound)); t = lexer.peek_token (); } return std::unique_ptr ( new AST::ImplTraitType (std::move (bounds), locus)); } case DYN: case QUESTION_MARK: { // either TraitObjectType or TraitObjectTypeOneBound bool has_dyn = false; if (t->get_id () == DYN) { lexer.skip_token (); has_dyn = true; } if (lexer.peek_token ()->get_id () == LIFETIME) { /* cannot be one bound because lifetime prevents it from being * traitbound */ std::vector> bounds = parse_type_param_bounds (); return std::unique_ptr ( new AST::TraitObjectType (std::move (bounds), t->get_locus (), has_dyn)); } else { // should be trait bound, so parse trait bound std::unique_ptr initial_bound = parse_trait_bound (); if (initial_bound == nullptr) { if (save_errors) { Error error ( lexer.peek_token ()->get_locus (), "failed to parse TraitObjectType initial bound"); add_error (std::move (error)); } return nullptr; } // short cut if next token isn't '+' t = lexer.peek_token (); if (t->get_id () != PLUS) { // convert trait bound to value object AST::TraitBound value_bound (*initial_bound); // DEBUG: removed as unique ptr, so should auto delete // delete initial_bound; return std::unique_ptr ( new AST::TraitObjectTypeOneBound (std::move (value_bound), t->get_locus (), has_dyn)); } // parse additional type param bounds std::vector> bounds; bounds.push_back (std::move (initial_bound)); while (t->get_id () == PLUS) { lexer.skip_token (); // parse bound if it exists std::unique_ptr bound = parse_type_param_bound (); if (bound == nullptr) { // not an error as trailing plus may exist break; } bounds.push_back (std::move (bound)); t = lexer.peek_token (); } return std::unique_ptr ( new AST::TraitObjectType (std::move (bounds), t->get_locus (), has_dyn)); } } default: if (save_errors) add_error (Error (t->get_locus (), "unrecognised token %qs in type", t->get_token_description ())); return nullptr; } } /* Parses a type that has '(' as its first character. Returns a tuple type, * parenthesised type, TraitObjectTypeOneBound, or TraitObjectType depending * on following characters. */ template std::unique_ptr Parser::parse_paren_prefixed_type () { /* NOTE: Syntactical ambiguity of a parenthesised trait bound is considered * a trait bound, not a parenthesised type, so that it can still be used in * type param bounds. */ /* NOTE: this implementation is really shit but I couldn't think of a better * one. It requires essentially breaking polymorphism and downcasting via * virtual method abuse, as it was copied from the rustc implementation (in * which types are reified due to tagged union), after a more OOP attempt by * me failed. */ location_t left_delim_locus = lexer.peek_token ()->get_locus (); // skip left delim lexer.skip_token (); /* while next token isn't close delim, parse comma-separated types, saving * whether trailing comma happens */ const_TokenPtr t = lexer.peek_token (); bool trailing_comma = true; std::vector> types; while (t->get_id () != RIGHT_PAREN) { std::unique_ptr type = parse_type (); if (type == nullptr) { Error error (t->get_locus (), "failed to parse type inside parentheses (probably " "tuple or parenthesised)"); add_error (std::move (error)); return nullptr; } types.push_back (std::move (type)); t = lexer.peek_token (); if (t->get_id () != COMMA) { trailing_comma = false; break; } lexer.skip_token (); t = lexer.peek_token (); } if (!skip_token (RIGHT_PAREN)) { return nullptr; } // if only one type and no trailing comma, then not a tuple type if (types.size () == 1 && !trailing_comma) { // must be a TraitObjectType (with more than one bound) if (lexer.peek_token ()->get_id () == PLUS) { // create type param bounds vector std::vector> bounds; // HACK: convert type to traitbound and add to bounds std::unique_ptr released_ptr = std::move (types[0]); std::unique_ptr converted_bound ( released_ptr->to_trait_bound (true)); if (converted_bound == nullptr) { Error error ( lexer.peek_token ()->get_locus (), "failed to hackily converted parsed type to trait bound"); add_error (std::move (error)); return nullptr; } bounds.push_back (std::move (converted_bound)); t = lexer.peek_token (); while (t->get_id () == PLUS) { lexer.skip_token (); // attempt to parse typeparambound std::unique_ptr bound = parse_type_param_bound (); if (bound == nullptr) { // not an error if null break; } bounds.push_back (std::move (bound)); t = lexer.peek_token (); } return std::unique_ptr ( new AST::TraitObjectType (std::move (bounds), left_delim_locus, false)); } else { // release vector pointer std::unique_ptr released_ptr = std::move (types[0]); /* HACK: attempt to convert to trait bound. if fails, parenthesised * type */ std::unique_ptr converted_bound ( released_ptr->to_trait_bound (true)); if (converted_bound == nullptr) { // parenthesised type return std::unique_ptr ( new AST::ParenthesisedType (std::move (released_ptr), left_delim_locus)); } else { // trait object type (one bound) // get value semantics trait bound AST::TraitBound value_bound (*converted_bound); return std::unique_ptr ( new AST::TraitObjectTypeOneBound (value_bound, left_delim_locus)); } } } else { return std::unique_ptr ( new AST::TupleType (std::move (types), left_delim_locus)); } /* TODO: ensure that this ensures that dynamic dispatch for traits is not * lost somehow */ } /* Parses a type that has 'for' as its first character. This means it has a * "for lifetimes", so returns either a BareFunctionType, TraitObjectType, or * TraitObjectTypeOneBound depending on following characters. */ template std::unique_ptr Parser::parse_for_prefixed_type () { location_t for_locus = lexer.peek_token ()->get_locus (); // parse for lifetimes in type std::vector for_lifetimes = parse_for_lifetimes (); // branch on next token - either function or a trait type const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case ASYNC: case CONST: case UNSAFE: case EXTERN_KW: case FN_KW: return parse_bare_function_type (std::move (for_lifetimes)); case SCOPE_RESOLUTION: case IDENTIFIER: case SUPER: case SELF: case SELF_ALIAS: case CRATE: case DOLLAR_SIGN: { // path, so trait type // parse type path to finish parsing trait bound AST::TypePath path = parse_type_path (); t = lexer.peek_token (); if (t->get_id () != PLUS) { // must be one-bound trait type // create trait bound value object AST::TraitBound bound (std::move (path), for_locus, false, false, std::move (for_lifetimes)); return std::unique_ptr ( new AST::TraitObjectTypeOneBound (std::move (bound), for_locus)); } /* more than one bound trait type (or at least parsed as it - could be * trailing '+') create trait bound pointer and bounds */ std::unique_ptr initial_bound ( new AST::TraitBound (std::move (path), for_locus, false, false, std::move (for_lifetimes))); std::vector> bounds; bounds.push_back (std::move (initial_bound)); while (t->get_id () == PLUS) { lexer.skip_token (); // parse type param bound if it exists std::unique_ptr bound = parse_type_param_bound (); if (bound == nullptr) { // not an error - e.g. trailing plus return nullptr; } bounds.push_back (std::move (bound)); t = lexer.peek_token (); } return std::unique_ptr ( new AST::TraitObjectType (std::move (bounds), for_locus, false)); } default: // error add_error (Error (t->get_locus (), "unrecognised token %qs in bare function type or trait " "object type or trait object type one bound", t->get_token_description ())); return nullptr; } } // Parses a maybe named param used in bare function types. template AST::MaybeNamedParam Parser::parse_maybe_named_param (AST::AttrVec outer_attrs) { /* Basically guess that param is named if first token is identifier or * underscore and second token is semicolon. This should probably have no * exceptions. rustc uses backtracking to parse these, but at the time of * writing gccrs has no backtracking capabilities. */ const_TokenPtr current = lexer.peek_token (); const_TokenPtr next = lexer.peek_token (1); Identifier name; AST::MaybeNamedParam::ParamKind kind = AST::MaybeNamedParam::UNNAMED; if (current->get_id () == IDENTIFIER && next->get_id () == COLON) { // named param name = {current}; kind = AST::MaybeNamedParam::IDENTIFIER; lexer.skip_token (1); } else if (current->get_id () == UNDERSCORE && next->get_id () == COLON) { // wildcard param name = {Values::Keywords::UNDERSCORE, current->get_locus ()}; kind = AST::MaybeNamedParam::WILDCARD; lexer.skip_token (1); } // parse type (required) std::unique_ptr type = parse_type (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse type in maybe named param"); add_error (std::move (error)); return AST::MaybeNamedParam::create_error (); } return AST::MaybeNamedParam (std::move (name), kind, std::move (type), std::move (outer_attrs), current->get_locus ()); } /* Parses a bare function type (with the given for lifetimes for convenience - * does not parse them itself). */ template std::unique_ptr Parser::parse_bare_function_type ( std::vector for_lifetimes) { // TODO: pass in for lifetime location as param location_t best_try_locus = lexer.peek_token ()->get_locus (); auto qualifiers = parse_function_qualifiers (); if (!qualifiers) return nullptr; if (!skip_token (FN_KW)) return nullptr; if (!skip_token (LEFT_PAREN)) return nullptr; // parse function params, if they exist std::vector params; bool is_variadic = false; AST::AttrVec variadic_attrs; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_PAREN) { AST::AttrVec temp_attrs = parse_outer_attributes (); if (lexer.peek_token ()->get_id () == ELLIPSIS) { lexer.skip_token (); is_variadic = true; variadic_attrs = std::move (temp_attrs); t = lexer.peek_token (); if (t->get_id () != RIGHT_PAREN) { Error error (t->get_locus (), "expected right parentheses after variadic in maybe " "named function " "parameters, found %qs", t->get_token_description ()); add_error (std::move (error)); return nullptr; } break; } AST::MaybeNamedParam param = parse_maybe_named_param (std::move (temp_attrs)); if (param.is_error ()) { Error error ( lexer.peek_token ()->get_locus (), "failed to parse maybe named param in bare function type"); add_error (std::move (error)); return nullptr; } params.push_back (std::move (param)); if (lexer.peek_token ()->get_id () != COMMA) break; lexer.skip_token (); t = lexer.peek_token (); } if (!skip_token (RIGHT_PAREN)) return nullptr; // bare function return type, if exists std::unique_ptr return_type = nullptr; if (lexer.peek_token ()->get_id () == RETURN_TYPE) { lexer.skip_token (); // parse required TypeNoBounds return_type = parse_type_no_bounds (); if (return_type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse return type (type no bounds) in bare " "function type"); add_error (std::move (error)); return nullptr; } } return std::unique_ptr (new AST::BareFunctionType ( std::move (for_lifetimes), std::move (qualifiers.value ()), std::move (params), is_variadic, std::move (variadic_attrs), std::move (return_type), best_try_locus)); } template std::unique_ptr Parser::parse_reference_type_inner (location_t locus) { // parse optional lifetime AST::Lifetime lifetime = AST::Lifetime::elided (); if (lexer.peek_token ()->get_id () == LIFETIME) { auto parsed_lifetime = parse_lifetime (true); if (parsed_lifetime) { lifetime = parsed_lifetime.value (); } else { Error error (lexer.peek_token ()->get_locus (), "failed to parse lifetime in reference type"); add_error (std::move (error)); return nullptr; } } bool is_mut = false; if (lexer.peek_token ()->get_id () == MUT) { lexer.skip_token (); is_mut = true; } // parse type no bounds, which is required std::unique_ptr type = parse_type_no_bounds (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse referenced type in reference type"); add_error (std::move (error)); return nullptr; } return std::unique_ptr ( new AST::ReferenceType (is_mut, std::move (type), locus, std::move (lifetime))); } // Parses a reference type (mutable or immutable, with given lifetime). template std::unique_ptr Parser::parse_reference_type () { auto t = lexer.peek_token (); auto locus = t->get_locus (); switch (t->get_id ()) { case AMP: skip_token (AMP); return parse_reference_type_inner (locus); case LOGICAL_AND: skip_token (LOGICAL_AND); return std::unique_ptr ( new AST::ReferenceType (false, parse_reference_type_inner (locus), locus)); default: rust_unreachable (); } } // Parses a raw (unsafe) pointer type. template std::unique_ptr Parser::parse_raw_pointer_type () { location_t locus = lexer.peek_token ()->get_locus (); skip_token (ASTERISK); AST::RawPointerType::PointerType kind = AST::RawPointerType::CONST; // branch on next token for pointer kind info const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case MUT: kind = AST::RawPointerType::MUT; lexer.skip_token (); break; case CONST: kind = AST::RawPointerType::CONST; lexer.skip_token (); break; default: add_error (Error (t->get_locus (), "unrecognised token %qs in raw pointer type", t->get_token_description ())); return nullptr; } // parse type no bounds (required) std::unique_ptr type = parse_type_no_bounds (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse pointed type of raw pointer type"); add_error (std::move (error)); return nullptr; } return std::unique_ptr ( new AST::RawPointerType (kind, std::move (type), locus)); } /* Parses a slice or array type, depending on following arguments (as * lookahead is not possible). */ template std::unique_ptr Parser::parse_slice_or_array_type () { location_t locus = lexer.peek_token ()->get_locus (); skip_token (LEFT_SQUARE); // parse inner type (required) std::unique_ptr inner_type = parse_type (); if (inner_type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse inner type in slice or array type"); add_error (std::move (error)); return nullptr; } // branch on next token const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case RIGHT_SQUARE: // slice type lexer.skip_token (); return std::unique_ptr ( new AST::SliceType (std::move (inner_type), locus)); case SEMICOLON: { // array type lexer.skip_token (); // parse required array size expression auto size = parse_anon_const (); if (!size) { Error error (lexer.peek_token ()->get_locus (), "failed to parse size expression in array type"); add_error (std::move (error)); return nullptr; } if (!skip_token (RIGHT_SQUARE)) { return nullptr; } return std::unique_ptr ( new AST::ArrayType (std::move (inner_type), std::move (*size), locus)); } default: // error add_error ( Error (t->get_locus (), "unrecognised token %qs in slice or array type after inner type", t->get_token_description ())); return nullptr; } } // Parses a type, taking into account type boundary disambiguation. template std::unique_ptr Parser::parse_type_no_bounds () { const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case EXCLAM: // never type - can't be macro as no path beforehand lexer.skip_token (); return std::unique_ptr ( new AST::NeverType (t->get_locus ())); case LEFT_SQUARE: // slice type or array type - requires further disambiguation return parse_slice_or_array_type (); case LEFT_SHIFT: case LEFT_ANGLE: { // qualified path in type AST::QualifiedPathInType path = parse_qualified_path_in_type (); if (path.is_error ()) { Error error (t->get_locus (), "failed to parse qualified path in type"); add_error (std::move (error)); return nullptr; } return std::unique_ptr ( new AST::QualifiedPathInType (std::move (path))); } case UNDERSCORE: // inferred type lexer.skip_token (); return std::unique_ptr ( new AST::InferredType (t->get_locus ())); case ASTERISK: // raw pointer type return parse_raw_pointer_type (); case AMP: // does this also include AMP_AMP? Yes! Which is... LOGICAL_AND? case LOGICAL_AND: // reference type return parse_reference_type (); case LIFETIME: /* probably a lifetime bound, so probably type param bounds in * TraitObjectType. this is not allowed, but detection here for error * message */ add_error (Error (t->get_locus (), "lifetime bounds (i.e. in type param bounds, in " "TraitObjectType) are not allowed as TypeNoBounds")); return nullptr; case IDENTIFIER: case SUPER: case SELF: case SELF_ALIAS: case CRATE: case DOLLAR_SIGN: case SCOPE_RESOLUTION: { // macro invocation or type path - requires further disambiguation. /* for parsing path component of each rule, perhaps parse it as a * typepath and attempt conversion to simplepath if a trailing '!' is * found */ /* Type path also includes TraitObjectTypeOneBound BUT if it starts * with it, it is exactly the same as a TypePath syntactically, so * this is a syntactical ambiguity. As such, the parser will parse it * as a TypePath. This, however, does not prevent TraitObjectType from * starting with a typepath. */ // parse path as type path AST::TypePath path = parse_type_path (); if (path.is_error ()) { Error error ( t->get_locus (), "failed to parse path as first component of type no bounds"); add_error (std::move (error)); return nullptr; } location_t locus = path.get_locus (); // branch on next token t = lexer.peek_token (); switch (t->get_id ()) { case EXCLAM: { // macro invocation // convert to simple path AST::SimplePath macro_path = path.as_simple_path (); if (macro_path.is_empty ()) { Error error (t->get_locus (), "failed to parse simple path in macro " "invocation (for type)"); add_error (std::move (error)); return nullptr; } lexer.skip_token (); auto tok_tree = parse_delim_token_tree (); if (!tok_tree) return nullptr; return AST::MacroInvocation::Regular ( AST::MacroInvocData (std::move (macro_path), std::move (tok_tree.value ())), {}, locus); } default: // assume that this is a type path and not an error return std::unique_ptr ( new AST::TypePath (std::move (path))); } } case LEFT_PAREN: /* tuple type or parenthesised type - requires further disambiguation * (the usual). ok apparently can be a parenthesised TraitBound too, so * could be TraitObjectTypeOneBound */ return parse_paren_prefixed_type_no_bounds (); case FOR: case ASYNC: case CONST: case UNSAFE: case EXTERN_KW: case FN_KW: // bare function type (with no for lifetimes) return parse_bare_function_type (std::vector ()); case IMPL: lexer.skip_token (); if (lexer.peek_token ()->get_id () == LIFETIME) { /* cannot be one bound because lifetime prevents it from being * traitbound not allowed as type no bounds, only here for error * message */ Error error ( lexer.peek_token ()->get_locus (), "lifetime (probably lifetime bound, in type param " "bounds, in ImplTraitType) is not allowed in TypeNoBounds"); add_error (std::move (error)); return nullptr; } else { // should be trait bound, so parse trait bound std::unique_ptr initial_bound = parse_trait_bound (); if (initial_bound == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse ImplTraitTypeOneBound bound"); add_error (std::move (error)); return nullptr; } location_t locus = t->get_locus (); // ensure not a trait with multiple bounds t = lexer.peek_token (); if (t->get_id () == PLUS) { Error error (t->get_locus (), "plus after trait bound means an ImplTraitType, " "which is not allowed as a TypeNoBounds"); add_error (std::move (error)); return nullptr; } return std::unique_ptr ( new AST::ImplTraitTypeOneBound (std::move (initial_bound), locus)); } case DYN: case QUESTION_MARK: { // either TraitObjectTypeOneBound bool has_dyn = false; if (t->get_id () == DYN) { lexer.skip_token (); has_dyn = true; } if (lexer.peek_token ()->get_id () == LIFETIME) { /* means that cannot be TraitObjectTypeOneBound - so here for * error message */ Error error (lexer.peek_token ()->get_locus (), "lifetime as bound in TraitObjectTypeOneBound " "is not allowed, so cannot be TypeNoBounds"); add_error (std::move (error)); return nullptr; } // should be trait bound, so parse trait bound std::unique_ptr initial_bound = parse_trait_bound (); if (initial_bound == nullptr) { Error error ( lexer.peek_token ()->get_locus (), "failed to parse TraitObjectTypeOneBound initial bound"); add_error (std::move (error)); return nullptr; } location_t locus = t->get_locus (); // detect error with plus as next token t = lexer.peek_token (); if (t->get_id () == PLUS) { Error error (t->get_locus (), "plus after trait bound means a TraitObjectType, " "which is not allowed as a TypeNoBounds"); add_error (std::move (error)); return nullptr; } // convert trait bound to value object AST::TraitBound value_bound (*initial_bound); return std::unique_ptr ( new AST::TraitObjectTypeOneBound (std::move (value_bound), locus, has_dyn)); } default: add_error (Error (t->get_locus (), "unrecognised token %qs in type no bounds", t->get_token_description ())); return nullptr; } } // Parses a type no bounds beginning with '('. template std::unique_ptr Parser::parse_paren_prefixed_type_no_bounds () { /* NOTE: this could probably be parsed without the HACK solution of * parse_paren_prefixed_type, but I was lazy. So FIXME for future.*/ /* NOTE: again, syntactical ambiguity of a parenthesised trait bound is * considered a trait bound, not a parenthesised type, so that it can still * be used in type param bounds. */ location_t left_paren_locus = lexer.peek_token ()->get_locus (); // skip left delim lexer.skip_token (); /* while next token isn't close delim, parse comma-separated types, saving * whether trailing comma happens */ const_TokenPtr t = lexer.peek_token (); bool trailing_comma = true; std::vector> types; while (t->get_id () != RIGHT_PAREN) { std::unique_ptr type = parse_type (); if (type == nullptr) { Error error (t->get_locus (), "failed to parse type inside parentheses (probably " "tuple or parenthesised)"); add_error (std::move (error)); return nullptr; } types.push_back (std::move (type)); t = lexer.peek_token (); if (t->get_id () != COMMA) { trailing_comma = false; break; } lexer.skip_token (); t = lexer.peek_token (); } if (!skip_token (RIGHT_PAREN)) { return nullptr; } // if only one type and no trailing comma, then not a tuple type if (types.size () == 1 && !trailing_comma) { // must be a TraitObjectType (with more than one bound) if (lexer.peek_token ()->get_id () == PLUS) { // error - this is not allowed for type no bounds Error error (lexer.peek_token ()->get_locus (), "plus (implying TraitObjectType as type param " "bounds) is not allowed in type no bounds"); add_error (std::move (error)); return nullptr; } else { // release vector pointer std::unique_ptr released_ptr = std::move (types[0]); /* HACK: attempt to convert to trait bound. if fails, parenthesised * type */ std::unique_ptr converted_bound ( released_ptr->to_trait_bound (true)); if (converted_bound == nullptr) { // parenthesised type return std::unique_ptr ( new AST::ParenthesisedType (std::move (released_ptr), left_paren_locus)); } else { // trait object type (one bound) // get value semantics trait bound AST::TraitBound value_bound (*converted_bound); return std::unique_ptr ( new AST::TraitObjectTypeOneBound (value_bound, left_paren_locus)); } } } else { return std::unique_ptr ( new AST::TupleType (std::move (types), left_paren_locus)); } /* TODO: ensure that this ensures that dynamic dispatch for traits is not * lost somehow */ } // Parses tuple struct items if they exist. Does not parse parentheses. template std::unique_ptr Parser::parse_tuple_struct_items () { std::vector> lower_patterns; // DEBUG rust_debug ("started parsing tuple struct items"); // check for '..' at front if (lexer.peek_token ()->get_id () == DOT_DOT) { // only parse upper patterns lexer.skip_token (); // DEBUG rust_debug ("'..' at front in tuple struct items detected"); std::vector> upper_patterns; const_TokenPtr t = lexer.peek_token (); while (t->get_id () == COMMA) { lexer.skip_token (); // break if right paren if (lexer.peek_token ()->get_id () == RIGHT_PAREN) break; // parse pattern, which is now required std::unique_ptr pattern = parse_pattern (); if (pattern == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse pattern in tuple struct items"); add_error (std::move (error)); return nullptr; } upper_patterns.push_back (std::move (pattern)); t = lexer.peek_token (); } // DEBUG rust_debug ( "finished parsing tuple struct items ranged (upper/none only)"); return std::unique_ptr ( new AST::TupleStructItemsHasRest (std::move (lower_patterns), std::move (upper_patterns))); } // has at least some lower patterns const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_PAREN && t->get_id () != DOT_DOT) { // DEBUG rust_debug ("about to parse pattern in tuple struct items"); // parse pattern, which is required std::unique_ptr pattern = parse_pattern (); if (pattern == nullptr) { Error error (t->get_locus (), "failed to parse pattern in tuple struct items"); add_error (std::move (error)); return nullptr; } lower_patterns.push_back (std::move (pattern)); // DEBUG rust_debug ("successfully parsed pattern in tuple struct items"); if (lexer.peek_token ()->get_id () != COMMA) { // DEBUG rust_debug ("broke out of parsing patterns in tuple struct " "items as no comma"); break; } lexer.skip_token (); t = lexer.peek_token (); } // branch on next token t = lexer.peek_token (); switch (t->get_id ()) { case RIGHT_PAREN: return std::unique_ptr ( new AST::TupleStructItemsNoRest (std::move (lower_patterns))); case DOT_DOT: { // has an upper range that must be parsed separately lexer.skip_token (); std::vector> upper_patterns; t = lexer.peek_token (); while (t->get_id () == COMMA) { lexer.skip_token (); // break if next token is right paren if (lexer.peek_token ()->get_id () == RIGHT_PAREN) break; // parse pattern, which is required std::unique_ptr pattern = parse_pattern (); if (pattern == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse pattern in tuple struct items"); add_error (std::move (error)); return nullptr; } upper_patterns.push_back (std::move (pattern)); t = lexer.peek_token (); } return std::unique_ptr ( new AST::TupleStructItemsHasRest (std::move (lower_patterns), std::move (upper_patterns))); } default: // error add_error (Error (t->get_locus (), "unexpected token %qs in tuple struct items", t->get_token_description ())); return nullptr; } } /* Parses a statement or expression (depending on whether a trailing semicolon * exists). Useful for block expressions where it cannot be determined through * lookahead whether it is a statement or expression to be parsed. */ template tl::expected Parser::parse_stmt_or_expr () { // quick exit for empty statement const_TokenPtr t = lexer.peek_token (); if (t->get_id () == SEMICOLON) { lexer.skip_token (); std::unique_ptr stmt ( new AST::EmptyStmt (t->get_locus ())); return ExprOrStmt (std::move (stmt)); } // parse outer attributes AST::AttrVec outer_attrs = parse_outer_attributes (); ParseRestrictions restrictions; restrictions.expr_can_be_stmt = true; // Defered child error checking: we need to check for a semicolon tl::expected, Parse::Error::Expr> expr; // parsing this will be annoying because of the many different possibilities /* best may be just to copy paste in parse_item switch, and failing that try * to parse outer attributes, and then pass them in to either a let * statement or (fallback) expression statement. */ // FIXME: think of a way to do this without such a large switch? /* FIXME: for expressions at least, the only way that they can really be * parsed properly in this way is if they don't support operators on them. * They must be pratt-parsed otherwise. As such due to composability, only * explicit statements will have special cases here. This should roughly * correspond to "expr-with-block", but this warning is here in case it * isn't the case. */ t = lexer.peek_token (); switch (t->get_id ()) { case LET: { // let statement std::unique_ptr stmt ( parse_let_stmt (std::move (outer_attrs))); return ExprOrStmt (std::move (stmt)); } case PUB: case MOD: case EXTERN_KW: case USE: case FN_KW: case TYPE: case STRUCT_KW: case ENUM_KW: case CONST: case STATIC_KW: case AUTO: case TRAIT: case IMPL: { std::unique_ptr item ( parse_vis_item (std::move (outer_attrs))); return ExprOrStmt (std::move (item)); } /* TODO: implement union keyword but not really because of * context-dependence crappy hack way to parse a union written below to * separate it from the good code. */ // case UNION: case UNSAFE: { // maybe - unsafe traits are a thing /* if any of these (should be all possible VisItem prefixes), parse a * VisItem - can't parse item because would require reparsing outer * attributes */ const_TokenPtr t2 = lexer.peek_token (1); switch (t2->get_id ()) { case LEFT_CURLY: { // unsafe block: parse as expression expr = parse_expr (std::move (outer_attrs), restrictions); break; } case AUTO: case TRAIT: { // unsafe trait std::unique_ptr item ( parse_vis_item (std::move (outer_attrs))); return ExprOrStmt (std::move (item)); } case EXTERN_KW: case FN_KW: { // unsafe function std::unique_ptr item ( parse_vis_item (std::move (outer_attrs))); return ExprOrStmt (std::move (item)); } case IMPL: { // unsafe trait impl std::unique_ptr item ( parse_vis_item (std::move (outer_attrs))); return ExprOrStmt (std::move (item)); } default: add_error (Error (t2->get_locus (), "unrecognised token %qs after parsing unsafe - " "expected beginning of expression or statement", t->get_token_description ())); // skip somewhere? return tl::unexpected ( Parse::Error::Node::MALFORMED); } break; } /* FIXME: this is either a macro invocation or macro invocation semi. * start parsing to determine which one it is. */ // FIXME: old code there // crappy hack to do union "keyword" case IDENTIFIER: if (t->get_str () == Values::WeakKeywords::UNION && lexer.peek_token (1)->get_id () == IDENTIFIER) { std::unique_ptr item ( parse_vis_item (std::move (outer_attrs))); return ExprOrStmt (std::move (item)); // or should this go straight to parsing union? } else if (t->get_str () == Values::WeakKeywords::MACRO_RULES && lexer.peek_token (1)->get_id () == EXCLAM) { // macro_rules! macro item std::unique_ptr item ( parse_macro_rules_def (std::move (outer_attrs))); return ExprOrStmt (std::move (item)); } gcc_fallthrough (); case SUPER: case SELF: case SELF_ALIAS: case CRATE: case SCOPE_RESOLUTION: case DOLLAR_SIGN: { AST::PathInExpression path = parse_path_in_expression (); tl::expected, Parse::Error::Expr> null_denotation; if (lexer.peek_token ()->get_id () == EXCLAM) { std::unique_ptr invoc = parse_macro_invocation_partial (std::move (path), std::move (outer_attrs)); if (invoc == nullptr) return tl::unexpected ( Parse::Error::Node::CHILD_ERROR); if (restrictions.consume_semi && maybe_skip_token (SEMICOLON)) { invoc->add_semicolon (); // Macro invocation with semicolon. return ExprOrStmt ( std::unique_ptr (std::move (invoc))); } TokenId after_macro = lexer.peek_token ()->get_id (); AST::DelimType delim_type = invoc->get_invoc_data () .get_delim_tok_tree () .get_delim_type (); if (delim_type == AST::CURLY && after_macro != DOT && after_macro != QUESTION_MARK) { rust_debug ("braced macro statement"); return ExprOrStmt ( std::unique_ptr (std::move (invoc))); } null_denotation = std::move (invoc); } else { null_denotation = null_denotation_path (std::move (path), {}, restrictions); } expr = left_denotations (std::move (null_denotation), LBP_LOWEST, std::move (outer_attrs), restrictions); break; } default: /* expression statement or expression itself - parse * expression then make it statement if semi afterwards */ expr = parse_expr (std::move (outer_attrs), restrictions); break; } const_TokenPtr after_expr = lexer.peek_token (); if (after_expr->get_id () == SEMICOLON) { // must be expression statement lexer.skip_token (); if (expr) { return ExprOrStmt ( std::make_unique (std::move (expr.value ()), t->get_locus (), true)); } else { return tl::unexpected ( Parse::Error::Node::CHILD_ERROR); } } if (expr) { // block expression statement. if (!expr.value ()->is_expr_without_block () && after_expr->get_id () != RIGHT_CURLY) return ExprOrStmt ( std::make_unique (std::move (expr.value ()), t->get_locus (), false)); // Check if expr_without_block is properly terminated if (expr.value ()->is_expr_without_block () && after_expr->get_id () != RIGHT_CURLY) { // expr_without_block must be followed by ';' or '}' Error error (after_expr->get_locus (), "expected %<;%> or %<}%> after expression, found %qs", after_expr->get_token_description ()); add_error (std::move (error)); return tl::unexpected ( Parse::Error::Node::MALFORMED); } } // return expression if (expr) return ExprOrStmt (std::move (expr.value ())); else return tl::unexpected (Parse::Error::Node::CHILD_ERROR); } } // namespace Rust #include "rust-parse-impl-utils.hxx" #include "rust-parse-impl-attribute.hxx" #include "rust-parse-impl-ttree.hxx" #include "rust-parse-impl-macro.hxx" #include "rust-parse-impl-path.hxx" #include "rust-parse-impl-pattern.hxx" #include "rust-parse-impl-expr.hxx"