// Copyright (C) 2020-2022 Free Software Foundation, Inc. // This file is part of GCC. // GCC is free software; you can redistribute it and/or modify it under // the terms of the GNU General Public License as published by the Free // Software Foundation; either version 3, or (at your option) any later // version. // GCC is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // You should have received a copy of the GNU General Public License // along with GCC; see the file COPYING3. If not see // . /* Template implementation for Rust::Parser. Previously in rust-parse.cc (before * Parser was template). Separated from rust-parse.h for readability. */ /* DO NOT INCLUDE ANYWHERE - this is automatically included with rust-parse.h * This is also the reason why there are no include guards. */ #define INCLUDE_ALGORITHM #include "rust-diagnostics.h" #include "rust-make-unique.h" namespace Rust { // Left binding powers of operations. enum binding_powers { // Highest priority LBP_HIGHEST = 100, LBP_PATH = 95, LBP_METHOD_CALL = 90, LBP_FIELD_EXPR = 85, LBP_FUNCTION_CALL = 80, LBP_ARRAY_REF = LBP_FUNCTION_CALL, LBP_QUESTION_MARK = 75, // unary postfix - counts as left LBP_UNARY_PLUS = 70, // Used only when the null denotation is + LBP_UNARY_MINUS = LBP_UNARY_PLUS, // Used only when the null denotation is - LBP_UNARY_ASTERISK = LBP_UNARY_PLUS, // deref operator - unary prefix LBP_UNARY_EXCLAM = LBP_UNARY_PLUS, LBP_UNARY_AMP = LBP_UNARY_PLUS, LBP_UNARY_AMP_MUT = LBP_UNARY_PLUS, LBP_AS = 65, LBP_MUL = 60, LBP_DIV = LBP_MUL, LBP_MOD = LBP_MUL, LBP_PLUS = 55, LBP_MINUS = LBP_PLUS, LBP_L_SHIFT = 50, LBP_R_SHIFT = LBP_L_SHIFT, LBP_AMP = 45, LBP_CARET = 40, LBP_PIPE = 35, LBP_EQUAL = 30, LBP_NOT_EQUAL = LBP_EQUAL, LBP_SMALLER_THAN = LBP_EQUAL, LBP_SMALLER_EQUAL = LBP_EQUAL, LBP_GREATER_THAN = LBP_EQUAL, LBP_GREATER_EQUAL = LBP_EQUAL, LBP_LOGICAL_AND = 25, LBP_LOGICAL_OR = 20, LBP_DOT_DOT = 15, LBP_DOT_DOT_EQ = LBP_DOT_DOT, // TODO: note all these assig operators are RIGHT associative! LBP_ASSIG = 10, LBP_PLUS_ASSIG = LBP_ASSIG, LBP_MINUS_ASSIG = LBP_ASSIG, LBP_MULT_ASSIG = LBP_ASSIG, LBP_DIV_ASSIG = LBP_ASSIG, LBP_MOD_ASSIG = LBP_ASSIG, LBP_AMP_ASSIG = LBP_ASSIG, LBP_PIPE_ASSIG = LBP_ASSIG, LBP_CARET_ASSIG = LBP_ASSIG, LBP_L_SHIFT_ASSIG = LBP_ASSIG, LBP_R_SHIFT_ASSIG = LBP_ASSIG, // return, break, and closures as lowest priority? LBP_RETURN = 5, LBP_BREAK = LBP_RETURN, LBP_CLOSURE = LBP_RETURN, // unary prefix operators #if 0 // rust precedences // used for closures PREC_CLOSURE = -40, // used for break, continue, return, and yield PREC_JUMP = -30, // used for range (although weird comment in rustc about this) PREC_RANGE = -10, // used for binary operators mentioned below - also cast, colon (type), // assign, assign_op PREC_BINOP = FROM_ASSOC_OP, // used for box, address_of, let, unary (again, weird comment on let) PREC_PREFIX = 50, // used for await, call, method call, field, index, try, // inline asm, macro invocation PREC_POSTFIX = 60, // used for array, repeat, tuple, literal, path, paren, if, // while, for, 'loop', match, block, try block, async, struct PREC_PAREN = 99, PREC_FORCE_PAREN = 100, #endif // lowest priority LBP_LOWEST = 0 }; /* Returns whether the token can start a type (i.e. there is a valid type * beginning with the token). */ inline bool can_tok_start_type (TokenId id) { switch (id) { case EXCLAM: case LEFT_SQUARE: case LEFT_ANGLE: case UNDERSCORE: case ASTERISK: case AMP: case LIFETIME: case IDENTIFIER: case SUPER: case SELF: case SELF_ALIAS: case CRATE: case DOLLAR_SIGN: case SCOPE_RESOLUTION: case LEFT_PAREN: case FOR: case ASYNC: case CONST: case UNSAFE: case EXTERN_TOK: case FN_TOK: case IMPL: case DYN: case QUESTION_MARK: return true; default: return false; } } /* Returns whether the token id is (or is likely to be) a right angle bracket. * i.e. '>', '>>', '>=' and '>>=' tokens. */ inline bool is_right_angle_tok (TokenId id) { switch (id) { case RIGHT_ANGLE: case RIGHT_SHIFT: case GREATER_OR_EQUAL: case RIGHT_SHIFT_EQ: return true; default: return false; } } /* HACK-y special handling for skipping a right angle token at the end of * generic arguments. * Currently, this replaces the "current token" with one that is identical * except has the leading '>' removed (e.g. '>>' becomes '>'). This is bad * for several reasons - it modifies the token stream to something that * actually doesn't make syntactic sense, it may not worked if the token * has already been skipped, etc. It was done because it would not * actually require inserting new items into the token stream (which I * thought would take more work to not mess up) and because I wasn't sure * if the "already seen right angle" flag in the parser would work * correctly. * Those two other approaches listed are in my opinion actually better * long-term - insertion is probably best as it reflects syntactically * what occurs. On the other hand, I need to do a code audit to make sure * that insertion doesn't mess anything up. So that's a FIXME. */ template bool Parser::skip_generics_right_angle () { /* OK, new great idea. Have a lexer method called * "split_current_token(TokenType newLeft, TokenType newRight)", which is * called here with whatever arguments are appropriate. That lexer method * handles "replacing" the current token with the "newLeft" and "inserting" * the next token with the "newRight" (and creating a location, etc. for it) */ /* HACK: special handling for right shift '>>', greater or equal '>=', and * right shift assig */ // '>>=' const_TokenPtr tok = lexer.peek_token (); switch (tok->get_id ()) { case RIGHT_ANGLE: // this is good - skip token lexer.skip_token (); return true; case RIGHT_SHIFT: { // new implementation that should be better lexer.split_current_token (RIGHT_ANGLE, RIGHT_ANGLE); lexer.skip_token (); return true; } case GREATER_OR_EQUAL: { // new implementation that should be better lexer.split_current_token (RIGHT_ANGLE, EQUAL); lexer.skip_token (); return true; } case RIGHT_SHIFT_EQ: { // new implementation that should be better lexer.split_current_token (RIGHT_ANGLE, GREATER_OR_EQUAL); lexer.skip_token (); return true; } default: add_error (Error (tok->get_locus (), "expected %<>%> at end of generic argument - found %qs", tok->get_token_description ())); return false; } } /* Gets left binding power for specified token. * Not suitable for use at the moment or possibly ever because binding power * cannot be purely determined from operator token with Rust grammar - e.g. * method call and field access have * different left binding powers but the same operator token. */ template int Parser::left_binding_power (const_TokenPtr token) { // HACK: called with "peek_token()", so lookahead is "peek_token(1)" switch (token->get_id ()) { /* TODO: issue here - distinguish between method calls and field access * somehow? Also would have to distinguish between paths and function * calls (:: operator), maybe more stuff. */ /* Current plan for tackling LBP - don't do it based on token, use * lookahead. Or alternatively, only use Pratt parsing for OperatorExpr * and handle other expressions without it. rustc only considers * arithmetic, logical/relational, 'as', * '?=', ranges, colons, and assignment to have operator precedence and * associativity rules applicable. It then has * a separate "ExprPrecedence" that also includes binary operators. */ // TODO: handle operator overloading - have a function replace the // operator? /*case DOT: return LBP_DOT;*/ case SCOPE_RESOLUTION: rust_debug ( "possible error - looked up LBP of scope resolution operator. should " "be handled elsewhere."); return LBP_PATH; /* Resolved by lookahead HACK that should work with current code. If next * token is identifier and token after that isn't parenthesised expression * list, it is a field reference. */ case DOT: if (lexer.peek_token (1)->get_id () == IDENTIFIER && lexer.peek_token (2)->get_id () != LEFT_PAREN) { return LBP_FIELD_EXPR; } return LBP_METHOD_CALL; case LEFT_PAREN: return LBP_FUNCTION_CALL; case LEFT_SQUARE: return LBP_ARRAY_REF; // postfix question mark (i.e. error propagation expression) case QUESTION_MARK: return LBP_QUESTION_MARK; case AS: return LBP_AS; case ASTERISK: return LBP_MUL; case DIV: return LBP_DIV; case PERCENT: return LBP_MOD; case PLUS: return LBP_PLUS; case MINUS: return LBP_MINUS; case LEFT_SHIFT: return LBP_L_SHIFT; case RIGHT_SHIFT: return LBP_R_SHIFT; // binary & operator case AMP: return LBP_AMP; // binary ^ operator case CARET: return LBP_CARET; // binary | operator case PIPE: return LBP_PIPE; case EQUAL_EQUAL: return LBP_EQUAL; case NOT_EQUAL: return LBP_NOT_EQUAL; case RIGHT_ANGLE: return LBP_GREATER_THAN; case GREATER_OR_EQUAL: return LBP_GREATER_EQUAL; case LEFT_ANGLE: return LBP_SMALLER_THAN; case LESS_OR_EQUAL: return LBP_SMALLER_EQUAL; case LOGICAL_AND: return LBP_LOGICAL_AND; case OR: return LBP_LOGICAL_OR; case DOT_DOT: return LBP_DOT_DOT; case DOT_DOT_EQ: return LBP_DOT_DOT_EQ; case EQUAL: return LBP_ASSIG; case PLUS_EQ: return LBP_PLUS_ASSIG; case MINUS_EQ: return LBP_MINUS_ASSIG; case ASTERISK_EQ: return LBP_MULT_ASSIG; case DIV_EQ: return LBP_DIV_ASSIG; case PERCENT_EQ: return LBP_MOD_ASSIG; case AMP_EQ: return LBP_AMP_ASSIG; case PIPE_EQ: return LBP_PIPE_ASSIG; case CARET_EQ: return LBP_CARET_ASSIG; case LEFT_SHIFT_EQ: return LBP_L_SHIFT_ASSIG; case RIGHT_SHIFT_EQ: return LBP_R_SHIFT_ASSIG; /* HACK: float literal due to lexer misidentifying a dot then an integer as * a float */ case FLOAT_LITERAL: return LBP_FIELD_EXPR; // field expr is same as tuple expr in precedence, i imagine // TODO: is this needed anymore? lexer shouldn't do that anymore // anything that can't appear in an infix position is given lowest priority default: return LBP_LOWEST; } } // Returns true when current token is EOF. template bool Parser::done_end_of_file () { return lexer.peek_token ()->get_id () == END_OF_FILE; } // Parses a sequence of items within a module or the implicit top-level module // in a crate template std::vector> Parser::parse_items () { std::vector> items; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != END_OF_FILE) { std::unique_ptr item = parse_item (false); if (item == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse item in crate"); add_error (std::move (error)); // TODO: should all items be cleared? items = std::vector> (); break; } items.push_back (std::move (item)); t = lexer.peek_token (); } return items; } // Parses a crate (compilation unit) - entry point template std::unique_ptr Parser::parse_crate () { // parse inner attributes AST::AttrVec inner_attrs = parse_inner_attributes (); // parse items std::vector> items = parse_items (); // emit all errors for (const auto &error : error_table) error.emit_error (); return std::unique_ptr ( new AST::Crate (std::move (items), std::move (inner_attrs))); } // Parse a contiguous block of inner attributes. template AST::AttrVec Parser::parse_inner_attributes () { AST::AttrVec inner_attributes; // only try to parse it if it starts with "#!" not only "#" while ((lexer.peek_token ()->get_id () == HASH && lexer.peek_token (1)->get_id () == EXCLAM) || lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) { AST::Attribute inner_attr = parse_inner_attribute (); /* Ensure only valid inner attributes are added to the inner_attributes * list */ if (!inner_attr.is_empty ()) { inner_attributes.push_back (std::move (inner_attr)); } else { /* If no more valid inner attributes, break out of loop (only * contiguous inner attributes parsed). */ break; } } inner_attributes.shrink_to_fit (); return inner_attributes; } // Parse a inner or outer doc comment into an doc attribute template AST::Attribute Parser::parse_doc_comment () { const_TokenPtr token = lexer.peek_token (); Location locus = token->get_locus (); AST::SimplePathSegment segment ("doc", locus); std::vector segments; segments.push_back (std::move (segment)); AST::SimplePath attr_path (std::move (segments), false, locus); AST::LiteralExpr lit_expr (token->get_str (), AST::Literal::STRING, PrimitiveCoreType::CORETYPE_STR, {}, locus); std::unique_ptr attr_input ( new AST::AttrInputLiteral (std::move (lit_expr))); lexer.skip_token (); return AST::Attribute (std::move (attr_path), std::move (attr_input), locus); } // Parse a single inner attribute. template AST::Attribute Parser::parse_inner_attribute () { if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) return parse_doc_comment (); if (lexer.peek_token ()->get_id () != HASH) { Error error (lexer.peek_token ()->get_locus (), "BUG: token %<#%> is missing, but % " "was invoked"); add_error (std::move (error)); return AST::Attribute::create_empty (); } lexer.skip_token (); if (lexer.peek_token ()->get_id () != EXCLAM) { Error error (lexer.peek_token ()->get_locus (), "expected % or %<[%> for inner attribute"); add_error (std::move (error)); return AST::Attribute::create_empty (); } lexer.skip_token (); if (!skip_token (LEFT_SQUARE)) return AST::Attribute::create_empty (); AST::Attribute actual_attribute = parse_attribute_body (); if (!skip_token (RIGHT_SQUARE)) return AST::Attribute::create_empty (); return actual_attribute; } // Parses the body of an attribute (inner or outer). template AST::Attribute Parser::parse_attribute_body () { Location locus = lexer.peek_token ()->get_locus (); AST::SimplePath attr_path = parse_simple_path (); // ensure path is valid to parse attribute input if (attr_path.is_empty ()) { Error error (lexer.peek_token ()->get_locus (), "empty simple path in attribute"); add_error (std::move (error)); // Skip past potential further info in attribute (i.e. attr_input) skip_after_end_attribute (); return AST::Attribute::create_empty (); } std::unique_ptr attr_input = parse_attr_input (); // AttrInput is allowed to be null, so no checks here return AST::Attribute (std::move (attr_path), std::move (attr_input), locus); } /* Determines whether token is a valid simple path segment. This does not * include scope resolution operators. */ inline bool is_simple_path_segment (TokenId id) { switch (id) { case IDENTIFIER: case SUPER: case SELF: case CRATE: return true; case DOLLAR_SIGN: // assume that dollar sign leads to $crate return true; default: return false; } } // Parses a SimplePath AST node, if it exists. Does nothing otherwise. template AST::SimplePath Parser::parse_simple_path () { bool has_opening_scope_resolution = false; Location locus = Linemap::unknown_location (); // don't parse anything if not a path upfront if (!is_simple_path_segment (lexer.peek_token ()->get_id ()) && !is_simple_path_segment (lexer.peek_token (1)->get_id ())) return AST::SimplePath::create_empty (); /* Checks for opening scope resolution (i.e. global scope fully-qualified * path) */ if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) { has_opening_scope_resolution = true; locus = lexer.peek_token ()->get_locus (); lexer.skip_token (); } // Parse single required simple path segment AST::SimplePathSegment segment = parse_simple_path_segment (); // get location if not gotten already if (locus == Linemap::unknown_location ()) locus = segment.get_locus (); std::vector segments; // Return empty vector if first, actually required segment is an error if (segment.is_error ()) return AST::SimplePath::create_empty (); segments.push_back (std::move (segment)); // Parse all other simple path segments while (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) { // Skip scope resolution operator lexer.skip_token (); AST::SimplePathSegment new_segment = parse_simple_path_segment (); // Return path as currently constructed if segment in error state. if (new_segment.is_error ()) break; segments.push_back (std::move (new_segment)); } // DEBUG: check for any empty segments for (const auto &seg : segments) { if (seg.is_error ()) { rust_debug ( "when parsing simple path, somehow empty path segment was " "not filtered out. Path begins with '%s'", segments.at (0).as_string ().c_str ()); } } return AST::SimplePath (std::move (segments), has_opening_scope_resolution, locus); /* TODO: now that is_simple_path_segment exists, could probably start * actually making errors upon parse failure of segments and whatever */ } /* Parses a single SimplePathSegment (does not handle the scope resolution * operators) */ template AST::SimplePathSegment Parser::parse_simple_path_segment () { const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case IDENTIFIER: lexer.skip_token (); return AST::SimplePathSegment (t->get_str (), t->get_locus ()); case SUPER: lexer.skip_token (); return AST::SimplePathSegment ("super", t->get_locus ()); case SELF: lexer.skip_token (); return AST::SimplePathSegment ("self", t->get_locus ()); case CRATE: lexer.skip_token (); return AST::SimplePathSegment ("crate", t->get_locus ()); case DOLLAR_SIGN: if (lexer.peek_token (1)->get_id () == CRATE) { lexer.skip_token (1); return AST::SimplePathSegment ("$crate", t->get_locus ()); } gcc_fallthrough (); default: // do nothing but inactivates warning from gcc when compiling /* could put the rust_error_at thing here but fallthrough (from failing * $crate condition) isn't completely obvious if it is. */ // test prevent error return AST::SimplePathSegment::create_error (); } gcc_unreachable (); /*rust_error_at( t->get_locus(), "invalid token '%s' in simple path segment", t->get_token_description());*/ // this is not necessarily an error, e.g. end of path // return AST::SimplePathSegment::create_error(); } // Parses a PathIdentSegment - an identifier segment of a non-SimplePath path. template AST::PathIdentSegment Parser::parse_path_ident_segment () { const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case IDENTIFIER: lexer.skip_token (); return AST::PathIdentSegment (t->get_str (), t->get_locus ()); case SUPER: lexer.skip_token (); return AST::PathIdentSegment ("super", t->get_locus ()); case SELF: lexer.skip_token (); return AST::PathIdentSegment ("self", t->get_locus ()); case SELF_ALIAS: lexer.skip_token (); return AST::PathIdentSegment ("Self", t->get_locus ()); case CRATE: lexer.skip_token (); return AST::PathIdentSegment ("crate", t->get_locus ()); case DOLLAR_SIGN: if (lexer.peek_token (1)->get_id () == CRATE) { lexer.skip_token (1); return AST::PathIdentSegment ("$crate", t->get_locus ()); } gcc_fallthrough (); default: /* do nothing but inactivates warning from gcc when compiling * could put the error_at thing here but fallthrough (from failing $crate * condition) isn't completely obvious if it is. */ // test prevent error return AST::PathIdentSegment::create_error (); } gcc_unreachable (); // not necessarily an error } // Parses an AttrInput AST node (polymorphic, as AttrInput is abstract) template std::unique_ptr Parser::parse_attr_input () { const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case LEFT_PAREN: case LEFT_SQUARE: case LEFT_CURLY: { // must be a delimited token tree, so parse that std::unique_ptr input_tree ( new AST::DelimTokenTree (parse_delim_token_tree ())); // TODO: potential checks on DelimTokenTree before returning return input_tree; } case EQUAL: { // = LiteralExpr lexer.skip_token (); t = lexer.peek_token (); /* Ensure token is a "literal expression" (literally only a literal * token of any type) */ if (!t->is_literal ()) { Error error ( t->get_locus (), "unknown token %qs in attribute body - literal expected", t->get_token_description ()); add_error (std::move (error)); skip_after_end_attribute (); return nullptr; } AST::Literal::LitType lit_type = AST::Literal::STRING; // Crappy mapping of token type to literal type switch (t->get_id ()) { case INT_LITERAL: lit_type = AST::Literal::INT; break; case FLOAT_LITERAL: lit_type = AST::Literal::FLOAT; break; case CHAR_LITERAL: lit_type = AST::Literal::CHAR; break; case BYTE_CHAR_LITERAL: lit_type = AST::Literal::BYTE; break; case BYTE_STRING_LITERAL: lit_type = AST::Literal::BYTE_STRING; break; case STRING_LITERAL: default: lit_type = AST::Literal::STRING; break; // TODO: raw string? don't eliminate it from lexer? } // create actual LiteralExpr AST::LiteralExpr lit_expr (t->get_str (), lit_type, t->get_type_hint (), {}, t->get_locus ()); lexer.skip_token (); std::unique_ptr attr_input_lit ( new AST::AttrInputLiteral (std::move (lit_expr))); // do checks or whatever? none required, really // FIXME: shouldn't a skip token be required here? return attr_input_lit; } break; case RIGHT_SQUARE: // means AttrInput is missing, which is allowed return nullptr; default: add_error ( Error (t->get_locus (), "unknown token %qs in attribute body - attribute input or " "none expected", t->get_token_description ())); skip_after_end_attribute (); return nullptr; } gcc_unreachable (); // TODO: find out how to stop gcc error on "no return value" } /* Returns true if the token id matches the delimiter type. Note that this only * operates for END delimiter tokens. */ inline bool token_id_matches_delims (TokenId token_id, AST::DelimType delim_type) { return ((token_id == RIGHT_PAREN && delim_type == AST::PARENS) || (token_id == RIGHT_SQUARE && delim_type == AST::SQUARE) || (token_id == RIGHT_CURLY && delim_type == AST::CURLY)); } /* Returns true if the likely result of parsing the next few tokens is a path. * Not guaranteed, though, especially in the case of syntax errors. */ inline bool is_likely_path_next (TokenId next_token_id) { switch (next_token_id) { case IDENTIFIER: case SUPER: case SELF: case SELF_ALIAS: case CRATE: // maybe - maybe do extra check. But then requires another TokenId. case DOLLAR_SIGN: case SCOPE_RESOLUTION: return true; default: return false; } } // Parses a delimited token tree template AST::DelimTokenTree Parser::parse_delim_token_tree () { const_TokenPtr t = lexer.peek_token (); lexer.skip_token (); Location initial_loc = t->get_locus (); // save delim type to ensure it is reused later AST::DelimType delim_type = AST::PARENS; // Map tokens to DelimType switch (t->get_id ()) { case LEFT_PAREN: delim_type = AST::PARENS; break; case LEFT_SQUARE: delim_type = AST::SQUARE; break; case LEFT_CURLY: delim_type = AST::CURLY; break; default: add_error (Error (t->get_locus (), "unexpected token %qs - expecting delimiters (for a " "delimited token tree)", t->get_token_description ())); return AST::DelimTokenTree::create_empty (); } // parse actual token tree vector - 0 or more std::vector> token_trees_in_tree; auto delim_open = std::unique_ptr (new AST::Token (std::move (t))); token_trees_in_tree.push_back (std::move (delim_open)); // repeat loop until finding the matching delimiter t = lexer.peek_token (); while (!token_id_matches_delims (t->get_id (), delim_type) && t->get_id () != END_OF_FILE) { std::unique_ptr tok_tree = parse_token_tree (); if (tok_tree == nullptr) { // TODO: is this error handling appropriate? Error error ( t->get_locus (), "failed to parse token tree in delimited token tree - found %qs", t->get_token_description ()); add_error (std::move (error)); return AST::DelimTokenTree::create_empty (); } token_trees_in_tree.push_back (std::move (tok_tree)); // lexer.skip_token(); t = lexer.peek_token (); } auto delim_close = std::unique_ptr (new AST::Token (std::move (t))); token_trees_in_tree.push_back (std::move (delim_close)); AST::DelimTokenTree token_tree (delim_type, std::move (token_trees_in_tree), initial_loc); // parse end delimiters t = lexer.peek_token (); if (token_id_matches_delims (t->get_id (), delim_type)) { // tokens match opening delimiter, so skip. lexer.skip_token (); // DEBUG rust_debug ("finished parsing new delim token tree - peeked token is now " "'%s' while t is '%s'", lexer.peek_token ()->get_token_description (), t->get_token_description ()); return token_tree; } else { // tokens don't match opening delimiters, so produce error Error error (t->get_locus (), "unexpected token %qs - expecting closing delimiter %qs " "(for a delimited token tree)", t->get_token_description (), (delim_type == AST::PARENS ? ")" : (delim_type == AST::SQUARE ? "]" : "}"))); add_error (std::move (error)); /* return empty token tree despite possibly parsing valid token tree - * TODO is this a good idea? */ return AST::DelimTokenTree::create_empty (); } } /* Parses a TokenTree syntactical production. This is either a delimited token * tree or a non-delimiter token. */ template std::unique_ptr Parser::parse_token_tree () { const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case LEFT_PAREN: case LEFT_SQUARE: case LEFT_CURLY: // Parse delimited token tree // TODO: use move rather than copy constructor return std::unique_ptr ( new AST::DelimTokenTree (parse_delim_token_tree ())); case RIGHT_PAREN: case RIGHT_SQUARE: case RIGHT_CURLY: // error - should not be called when this a token add_error ( Error (t->get_locus (), "unexpected closing delimiter %qs - token tree requires " "either paired delimiters or non-delimiter tokens", t->get_token_description ())); lexer.skip_token (); return nullptr; default: // parse token itself as TokenTree lexer.skip_token (); return std::unique_ptr (new AST::Token (std::move (t))); } } // Parses a single item template std::unique_ptr Parser::parse_item (bool called_from_statement) { // has a "called_from_statement" parameter for better error message handling // parse outer attributes for item AST::AttrVec outer_attrs = parse_outer_attributes (); // TODO: decide how to deal with VisItem vs MacroItem dichotomy /* best current solution: catch all keywords that would imply a VisItem in a * switch and have MacroItem as a last resort */ const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case END_OF_FILE: // not necessarily an error, unless we just read outer // attributes which needs to be attached if (!outer_attrs.empty ()) { Rust::AST::Attribute attr = outer_attrs.back (); Error error (attr.get_locus (), "expected item after outer attribute or doc comment"); add_error (std::move (error)); } return nullptr; case PUB: case MOD: case EXTERN_TOK: case USE: case FN_TOK: case TYPE: case STRUCT_TOK: case ENUM_TOK: case CONST: case STATIC_TOK: case TRAIT: case IMPL: /* TODO: implement union keyword but not really because of * context-dependence crappy hack way to parse a union written below to * separate it from the good code. */ // case UNION: case UNSAFE: // maybe - unsafe traits are a thing // if any of these (should be all possible VisItem prefixes), parse a // VisItem return parse_vis_item (std::move (outer_attrs)); break; case SUPER: case SELF: case CRATE: case DOLLAR_SIGN: // almost certainly macro invocation semi return parse_macro_item (std::move (outer_attrs)); break; // crappy hack to do union "keyword" case IDENTIFIER: // TODO: ensure std::string and literal comparison works if (t->get_str () == "union" && lexer.peek_token (1)->get_id () == IDENTIFIER) { return parse_vis_item (std::move (outer_attrs)); // or should this go straight to parsing union? } else if (t->get_str () == "macro_rules") { // macro_rules! macro item return parse_macro_item (std::move (outer_attrs)); } else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION || lexer.peek_token (1)->get_id () == EXCLAM) { /* path (probably) or macro invocation, so probably a macro invocation * semi */ return parse_macro_item (std::move (outer_attrs)); } gcc_fallthrough (); default: // otherwise unrecognised // return parse_macro_item(std::move(outer_attrs)); add_error (Error (t->get_locus (), "unrecognised token %qs for start of %s", t->get_token_description (), called_from_statement ? "statement" : "item")); // skip somewhere? return nullptr; break; } } // Parses a contiguous block of outer attributes. template AST::AttrVec Parser::parse_outer_attributes () { AST::AttrVec outer_attributes; while (lexer.peek_token ()->get_id () == HASH /* Can also be #!, which catches errors. */ || lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT || lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) /* For error handling. */ { AST::Attribute outer_attr = parse_outer_attribute (); /* Ensure only valid outer attributes are added to the outer_attributes * list */ if (!outer_attr.is_empty ()) { outer_attributes.push_back (std::move (outer_attr)); } else { /* If no more valid outer attributes, break out of loop (only * contiguous outer attributes parsed). */ break; } } outer_attributes.shrink_to_fit (); return outer_attributes; /* TODO: this shares basically all code with parse_inner_attributes except * function call - find way of making it more modular? function pointer? */ } // Parse a single outer attribute. template AST::Attribute Parser::parse_outer_attribute () { if (lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT) return parse_doc_comment (); if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) { Error error ( lexer.peek_token ()->get_locus (), "inner doc (% or %) only allowed at start of item " "and before any outer attribute or doc (%<#[%>, % or %)"); add_error (std::move (error)); lexer.skip_token (); return AST::Attribute::create_empty (); } /* OuterAttribute -> '#' '[' Attr ']' */ if (lexer.peek_token ()->get_id () != HASH) return AST::Attribute::create_empty (); lexer.skip_token (); TokenId id = lexer.peek_token ()->get_id (); if (id != LEFT_SQUARE) { if (id == EXCLAM) { // this is inner attribute syntax, so throw error // inner attributes were either already parsed or not allowed here. Error error ( lexer.peek_token ()->get_locus (), "token % found, indicating inner attribute definition. Inner " "attributes are not possible at this location"); add_error (std::move (error)); } return AST::Attribute::create_empty (); } lexer.skip_token (); AST::Attribute actual_attribute = parse_attribute_body (); if (lexer.peek_token ()->get_id () != RIGHT_SQUARE) return AST::Attribute::create_empty (); lexer.skip_token (); return actual_attribute; } // Parses a VisItem (item that can have non-default visibility). template std::unique_ptr Parser::parse_vis_item (AST::AttrVec outer_attrs) { // parse visibility, which may or may not exist AST::Visibility vis = parse_visibility (); // select VisItem to create depending on keyword const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case MOD: return parse_module (std::move (vis), std::move (outer_attrs)); case EXTERN_TOK: // lookahead to resolve syntactical production t = lexer.peek_token (1); switch (t->get_id ()) { case CRATE: return parse_extern_crate (std::move (vis), std::move (outer_attrs)); case FN_TOK: // extern function return parse_function (std::move (vis), std::move (outer_attrs)); case LEFT_CURLY: // extern block return parse_extern_block (std::move (vis), std::move (outer_attrs)); case STRING_LITERAL: // for specifying extern ABI // could be extern block or extern function, so more lookahead t = lexer.peek_token (2); switch (t->get_id ()) { case FN_TOK: return parse_function (std::move (vis), std::move (outer_attrs)); case LEFT_CURLY: return parse_extern_block (std::move (vis), std::move (outer_attrs)); default: add_error ( Error (t->get_locus (), "unexpected token %qs in some sort of extern production", t->get_token_description ())); lexer.skip_token (2); // TODO: is this right thing to do? return nullptr; } default: add_error ( Error (t->get_locus (), "unexpected token %qs in some sort of extern production", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } case USE: return parse_use_decl (std::move (vis), std::move (outer_attrs)); case FN_TOK: return parse_function (std::move (vis), std::move (outer_attrs)); case TYPE: return parse_type_alias (std::move (vis), std::move (outer_attrs)); case STRUCT_TOK: return parse_struct (std::move (vis), std::move (outer_attrs)); case ENUM_TOK: return parse_enum (std::move (vis), std::move (outer_attrs)); // TODO: implement union keyword but not really because of // context-dependence case UNION: crappy hack to do union "keyword" case IDENTIFIER: if (t->get_str () == "union" && lexer.peek_token (1)->get_id () == IDENTIFIER) { return parse_union (std::move (vis), std::move (outer_attrs)); // or should item switch go straight to parsing union? } else { break; } case CONST: // lookahead to resolve syntactical production t = lexer.peek_token (1); switch (t->get_id ()) { case IDENTIFIER: case UNDERSCORE: return parse_const_item (std::move (vis), std::move (outer_attrs)); case UNSAFE: case EXTERN_TOK: case FN_TOK: return parse_function (std::move (vis), std::move (outer_attrs)); default: add_error ( Error (t->get_locus (), "unexpected token %qs in some sort of const production", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } case STATIC_TOK: return parse_static_item (std::move (vis), std::move (outer_attrs)); case TRAIT: return parse_trait (std::move (vis), std::move (outer_attrs)); case IMPL: return parse_impl (std::move (vis), std::move (outer_attrs)); case UNSAFE: // unsafe traits, unsafe functions, unsafe impls (trait impls), // lookahead to resolve syntactical production t = lexer.peek_token (1); switch (t->get_id ()) { case TRAIT: return parse_trait (std::move (vis), std::move (outer_attrs)); case EXTERN_TOK: case FN_TOK: return parse_function (std::move (vis), std::move (outer_attrs)); case IMPL: return parse_impl (std::move (vis), std::move (outer_attrs)); default: add_error ( Error (t->get_locus (), "unexpected token %qs in some sort of unsafe production", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } default: // otherwise vis item clearly doesn't exist, which is not an error // has a catch-all post-switch return to allow other breaks to occur break; } return nullptr; } // Parses a MacroItem (either a MacroInvocationSemi or MacroRulesDefinition). template std::unique_ptr Parser::parse_macro_item (AST::AttrVec outer_attrs) { const_TokenPtr t = lexer.peek_token (); /* dodgy way of detecting macro due to weird context-dependence thing. * probably can be improved */ // TODO: ensure that string compare works properly if (t->get_id () == IDENTIFIER && t->get_str () == "macro_rules") { return parse_macro_rules_def (std::move (outer_attrs)); } else { // DEBUG: TODO: remove rust_debug ( "DEBUG - parse_macro_item called and token is not macro_rules"); if (t->get_id () == IDENTIFIER) { rust_debug ("just add to last error: token is not macro_rules and is " "instead '%s'", t->get_str ().c_str ()); } else { rust_debug ("just add to last error: token is not macro_rules and is " "not an identifier either - it is '%s'", t->get_token_description ()); } return parse_macro_invocation_semi (std::move (outer_attrs)); } } // Parses a macro rules definition syntax extension whatever thing. template std::unique_ptr Parser::parse_macro_rules_def (AST::AttrVec outer_attrs) { // ensure that first token is identifier saying "macro_rules" const_TokenPtr t = lexer.peek_token (); if (t->get_id () != IDENTIFIER || t->get_str () != "macro_rules") { Error error ( t->get_locus (), "macro rules definition does not start with %"); add_error (std::move (error)); // skip after somewhere? return nullptr; } lexer.skip_token (); Location macro_locus = t->get_locus (); if (!skip_token (EXCLAM)) { // skip after somewhere? return nullptr; } // parse macro name const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) { return nullptr; } Identifier rule_name = ident_tok->get_str (); // DEBUG rust_debug ("in macro rules def, about to parse parens."); // save delim type to ensure it is reused later AST::DelimType delim_type = AST::PARENS; // Map tokens to DelimType t = lexer.peek_token (); switch (t->get_id ()) { case LEFT_PAREN: delim_type = AST::PARENS; break; case LEFT_SQUARE: delim_type = AST::SQUARE; break; case LEFT_CURLY: delim_type = AST::CURLY; break; default: add_error (Error (t->get_locus (), "unexpected token %qs - expecting delimiters (for a " "macro rules definition)", t->get_token_description ())); return nullptr; } lexer.skip_token (); // parse actual macro rules std::vector macro_rules; // must be at least one macro rule, so parse it AST::MacroRule initial_rule = parse_macro_rule (); if (initial_rule.is_error ()) { Error error (lexer.peek_token ()->get_locus (), "required first macro rule in macro rules definition " "could not be parsed"); add_error (std::move (error)); // skip after somewhere? return nullptr; } macro_rules.push_back (std::move (initial_rule)); // DEBUG rust_debug ("successfully pushed back initial macro rule"); t = lexer.peek_token (); // parse macro rules while (t->get_id () == SEMICOLON) { // skip semicolon lexer.skip_token (); // don't parse if end of macro rules if (token_id_matches_delims (lexer.peek_token ()->get_id (), delim_type)) { // DEBUG rust_debug ( "broke out of parsing macro rules loop due to finding delim"); break; } // try to parse next rule AST::MacroRule rule = parse_macro_rule (); if (rule.is_error ()) { Error error (lexer.peek_token ()->get_locus (), "failed to parse macro rule in macro rules definition"); add_error (std::move (error)); return nullptr; } macro_rules.push_back (std::move (rule)); // DEBUG rust_debug ("successfully pushed back another macro rule"); t = lexer.peek_token (); } // parse end delimiters t = lexer.peek_token (); if (token_id_matches_delims (t->get_id (), delim_type)) { // tokens match opening delimiter, so skip. lexer.skip_token (); if (delim_type != AST::CURLY) { // skip semicolon at end of non-curly macro definitions if (!skip_token (SEMICOLON)) { // as this is the end, allow recovery (probably) - may change return std::unique_ptr ( new AST::MacroRulesDefinition ( std::move (rule_name), delim_type, std::move (macro_rules), std::move (outer_attrs), macro_locus)); } } return std::unique_ptr ( new AST::MacroRulesDefinition (std::move (rule_name), delim_type, std::move (macro_rules), std::move (outer_attrs), macro_locus)); } else { // tokens don't match opening delimiters, so produce error Error error (t->get_locus (), "unexpected token %qs - expecting closing delimiter %qs " "(for a macro rules definition)", t->get_token_description (), (delim_type == AST::PARENS ? ")" : (delim_type == AST::SQUARE ? "]" : "}"))); add_error (std::move (error)); /* return empty macro definiton despite possibly parsing mostly valid one * - TODO is this a good idea? */ return nullptr; } } // Parses a semi-coloned (except for full block) macro invocation item. template std::unique_ptr Parser::parse_macro_invocation_semi ( AST::AttrVec outer_attrs) { Location macro_locus = lexer.peek_token ()->get_locus (); AST::SimplePath path = parse_simple_path (); if (!skip_token (EXCLAM)) { // skip after somewhere? return nullptr; } // save delim type to ensure it is reused later AST::DelimType delim_type = AST::PARENS; // Map tokens to DelimType const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case LEFT_PAREN: delim_type = AST::PARENS; break; case LEFT_SQUARE: delim_type = AST::SQUARE; break; case LEFT_CURLY: delim_type = AST::CURLY; break; default: add_error (Error (t->get_locus (), "unexpected token %qs - expecting delimiters (for a " "macro invocation semi body)", t->get_token_description ())); return nullptr; } Location tok_tree_locus = t->get_locus (); lexer.skip_token (); // parse actual token trees std::vector> token_trees; auto delim_open = std::unique_ptr (new AST::Token (std::move (t))); token_trees.push_back (std::move (delim_open)); t = lexer.peek_token (); // parse token trees until the initial delimiter token is found again while (!token_id_matches_delims (t->get_id (), delim_type)) { std::unique_ptr tree = parse_token_tree (); if (tree == nullptr) { Error error (t->get_locus (), "failed to parse token tree for macro invocation semi " "- found %qs", t->get_token_description ()); add_error (std::move (error)); return nullptr; } token_trees.push_back (std::move (tree)); t = lexer.peek_token (); } auto delim_close = std::unique_ptr (new AST::Token (std::move (t))); token_trees.push_back (std::move (delim_close)); AST::DelimTokenTree delim_tok_tree (delim_type, std::move (token_trees), tok_tree_locus); AST::MacroInvocData invoc_data (std::move (path), std::move (delim_tok_tree)); // parse end delimiters t = lexer.peek_token (); if (token_id_matches_delims (t->get_id (), delim_type)) { // tokens match opening delimiter, so skip. lexer.skip_token (); if (delim_type != AST::CURLY) { // skip semicolon at end of non-curly macro invocation semis if (!skip_token (SEMICOLON)) { // as this is the end, allow recovery (probably) - may change return std::unique_ptr ( new AST::MacroInvocation (std::move (invoc_data), std::move (outer_attrs), macro_locus, true)); } } // DEBUG: rust_debug ("skipped token is '%s', next token (current peek) is '%s'", t->get_token_description (), lexer.peek_token ()->get_token_description ()); return std::unique_ptr ( new AST::MacroInvocation (std::move (invoc_data), std::move (outer_attrs), macro_locus, true)); } else { // tokens don't match opening delimiters, so produce error Error error (t->get_locus (), "unexpected token %qs - expecting closing delimiter %qs " "(for a macro invocation semi)", t->get_token_description (), (delim_type == AST::PARENS ? ")" : (delim_type == AST::SQUARE ? "]" : "}"))); add_error (std::move (error)); /* return empty macro invocation despite possibly parsing mostly valid one * - TODO is this a good idea? */ return nullptr; } } // Parses a non-semicoloned macro invocation (i.e. as pattern or expression). template std::unique_ptr Parser::parse_macro_invocation (AST::AttrVec outer_attrs) { // parse macro path AST::SimplePath macro_path = parse_simple_path (); if (macro_path.is_empty ()) { Error error (lexer.peek_token ()->get_locus (), "failed to parse macro invocation path"); add_error (std::move (error)); // skip? return nullptr; } if (!skip_token (EXCLAM)) { // skip after somewhere? return nullptr; } // parse internal delim token tree AST::DelimTokenTree delim_tok_tree = parse_delim_token_tree (); Location macro_locus = macro_path.get_locus (); return std::unique_ptr ( new AST::MacroInvocation (AST::MacroInvocData (std::move (macro_path), std::move (delim_tok_tree)), std::move (outer_attrs), macro_locus)); } // Parses a macro rule definition - does not parse semicolons. template AST::MacroRule Parser::parse_macro_rule () { Location locus = lexer.peek_token ()->get_locus (); // parse macro matcher AST::MacroMatcher matcher = parse_macro_matcher (); if (matcher.is_error ()) return AST::MacroRule::create_error (locus); if (!skip_token (MATCH_ARROW)) { // skip after somewhere? return AST::MacroRule::create_error (locus); } // parse transcriber (this is just a delim token tree) Location token_tree_loc = lexer.peek_token ()->get_locus (); AST::MacroTranscriber transcriber (parse_delim_token_tree (), token_tree_loc); return AST::MacroRule (std::move (matcher), std::move (transcriber), locus); } // Parses a macro matcher (part of a macro rule definition). template AST::MacroMatcher Parser::parse_macro_matcher () { // save delim type to ensure it is reused later AST::DelimType delim_type = AST::PARENS; // DEBUG rust_debug ("begun parsing macro matcher"); // Map tokens to DelimType const_TokenPtr t = lexer.peek_token (); Location locus = t->get_locus (); switch (t->get_id ()) { case LEFT_PAREN: delim_type = AST::PARENS; break; case LEFT_SQUARE: delim_type = AST::SQUARE; break; case LEFT_CURLY: delim_type = AST::CURLY; break; default: add_error (Error ( t->get_locus (), "unexpected token %qs - expecting delimiters (for a macro matcher)", t->get_token_description ())); return AST::MacroMatcher::create_error (t->get_locus ()); } lexer.skip_token (); // parse actual macro matches std::vector> matches; // Set of possible preceding macro matches to make sure follow-set // restrictions are respected. // TODO: Consider using std::reference_wrapper instead of raw pointers? std::vector last_matches; t = lexer.peek_token (); // parse token trees until the initial delimiter token is found again while (!token_id_matches_delims (t->get_id (), delim_type)) { std::unique_ptr match = parse_macro_match (); if (match == nullptr) { Error error ( t->get_locus (), "failed to parse macro match for macro matcher - found %qs", t->get_token_description ()); add_error (std::move (error)); return AST::MacroMatcher::create_error (t->get_locus ()); } if (matches.size () > 0) { const auto *last_match = matches.back ().get (); // We want to check if we are dealing with a zeroable repetition bool zeroable = false; if (last_match->get_macro_match_type () == AST::MacroMatch::MacroMatchType::Repetition) { auto repetition = static_cast (last_match); if (repetition->get_op () != AST::MacroMatchRepetition::MacroRepOp::ONE_OR_MORE) zeroable = true; } if (!zeroable) last_matches.clear (); last_matches.emplace_back (last_match); for (auto last : last_matches) if (!is_match_compatible (*last, *match)) return AST::MacroMatcher::create_error ( match->get_match_locus ()); } matches.push_back (std::move (match)); // DEBUG rust_debug ("pushed back a match in macro matcher"); t = lexer.peek_token (); } // parse end delimiters t = lexer.peek_token (); if (token_id_matches_delims (t->get_id (), delim_type)) { // tokens match opening delimiter, so skip. lexer.skip_token (); return AST::MacroMatcher (delim_type, std::move (matches), locus); } else { // tokens don't match opening delimiters, so produce error Error error (t->get_locus (), "unexpected token %qs - expecting closing delimiter %qs " "(for a macro matcher)", t->get_token_description (), (delim_type == AST::PARENS ? ")" : (delim_type == AST::SQUARE ? "]" : "}"))); add_error (std::move (error)); /* return error macro matcher despite possibly parsing mostly correct one? * TODO is this the best idea? */ return AST::MacroMatcher::create_error (t->get_locus ()); } } // Parses a macro match (syntax match inside a matcher in a macro rule). template std::unique_ptr Parser::parse_macro_match () { // branch based on token available const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case LEFT_PAREN: case LEFT_SQUARE: case LEFT_CURLY: { // must be macro matcher as delimited AST::MacroMatcher matcher = parse_macro_matcher (); if (matcher.is_error ()) { Error error (lexer.peek_token ()->get_locus (), "failed to parse macro matcher in macro match"); add_error (std::move (error)); return nullptr; } return std::unique_ptr ( new AST::MacroMatcher (std::move (matcher))); } case DOLLAR_SIGN: { // have to do more lookahead to determine if fragment or repetition const_TokenPtr t2 = lexer.peek_token (1); switch (t2->get_id ()) { case ABSTRACT: case AS: case ASYNC: case BECOME: case BOX: case BREAK: case CONST: case CONTINUE: case CRATE: case DO: case DYN: case ELSE: case ENUM_TOK: case EXTERN_TOK: case FALSE_LITERAL: case FINAL_TOK: case FN_TOK: case FOR: case IF: case IMPL: case IN: case LET: case LOOP: case MACRO: case MATCH_TOK: case MOD: case MOVE: case MUT: case OVERRIDE_TOK: case PRIV: case PUB: case REF: case RETURN_TOK: case SELF_ALIAS: case SELF: case STATIC_TOK: case STRUCT_TOK: case SUPER: case TRAIT: case TRUE_LITERAL: case TRY: case TYPE: case TYPEOF: case UNSAFE: case UNSIZED: case USE: case VIRTUAL: case WHERE: case WHILE: case YIELD: case IDENTIFIER: // macro fragment return parse_macro_match_fragment (); case LEFT_PAREN: // macro repetition return parse_macro_match_repetition (); default: // error: unrecognised add_error ( Error (t2->get_locus (), "unrecognised token combination %<$%s%> at start of " "macro match - did you mean %<$identifier%> or %<$(%>?", t2->get_token_description ())); // skip somewhere? return nullptr; } } case RIGHT_PAREN: case RIGHT_SQUARE: case RIGHT_CURLY: // not allowed add_error (Error ( t->get_locus (), "closing delimiters like %qs are not allowed at the start of a macro " "match", t->get_token_description ())); // skip somewhere? return nullptr; default: // just the token lexer.skip_token (); return std::unique_ptr (new AST::Token (std::move (t))); } } // Parses a fragment macro match. template std::unique_ptr Parser::parse_macro_match_fragment () { Location fragment_locus = lexer.peek_token ()->get_locus (); skip_token (DOLLAR_SIGN); Identifier ident = ""; auto identifier = lexer.peek_token (); if (identifier->has_str ()) ident = identifier->get_str (); else ident = std::string (token_id_to_str (identifier->get_id ())); if (ident.empty ()) { Error error (lexer.peek_token ()->get_locus (), "missing identifier in macro match fragment"); add_error (std::move (error)); return nullptr; } skip_token (identifier->get_id ()); if (!skip_token (COLON)) { // skip after somewhere? return nullptr; } // get MacroFragSpec for macro const_TokenPtr t = expect_token (IDENTIFIER); if (t == nullptr) return nullptr; AST::MacroFragSpec frag = AST::MacroFragSpec::get_frag_spec_from_str (t->get_str ()); if (frag.is_error ()) { Error error (t->get_locus (), "invalid fragment specifier %qs in fragment macro match", t->get_str ().c_str ()); add_error (std::move (error)); return nullptr; } return std::unique_ptr ( new AST::MacroMatchFragment (std::move (ident), frag, fragment_locus)); } // Parses a repetition macro match. template std::unique_ptr Parser::parse_macro_match_repetition () { skip_token (DOLLAR_SIGN); skip_token (LEFT_PAREN); std::vector> matches; // parse required first macro match std::unique_ptr initial_match = parse_macro_match (); if (initial_match == nullptr) { Error error ( lexer.peek_token ()->get_locus (), "could not parse required first macro match in macro match repetition"); add_error (std::move (error)); // skip after somewhere? return nullptr; } matches.push_back (std::move (initial_match)); // parse optional later macro matches const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_PAREN) { std::unique_ptr match = parse_macro_match (); if (match == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse macro match in macro match repetition"); add_error (std::move (error)); return nullptr; } matches.push_back (std::move (match)); t = lexer.peek_token (); } if (!skip_token (RIGHT_PAREN)) { // skip after somewhere? return nullptr; } t = lexer.peek_token (); // see if separator token exists std::unique_ptr separator = nullptr; switch (t->get_id ()) { // repetition operators case ASTERISK: case PLUS: case QUESTION_MARK: // delimiters case LEFT_PAREN: case LEFT_CURLY: case LEFT_SQUARE: case RIGHT_PAREN: case RIGHT_CURLY: case RIGHT_SQUARE: // separator does not exist, so still null and don't skip token break; default: // separator does exist separator = std::unique_ptr (new AST::Token (std::move (t))); lexer.skip_token (); break; } // parse repetition operator t = lexer.peek_token (); AST::MacroMatchRepetition::MacroRepOp op = AST::MacroMatchRepetition::NONE; switch (t->get_id ()) { case ASTERISK: op = AST::MacroMatchRepetition::ANY; lexer.skip_token (); break; case PLUS: op = AST::MacroMatchRepetition::ONE_OR_MORE; lexer.skip_token (); break; case QUESTION_MARK: op = AST::MacroMatchRepetition::ZERO_OR_ONE; lexer.skip_token (); break; default: add_error ( Error (t->get_locus (), "expected macro repetition operator (%<*%>, %<+%>, or %) in " "macro match - found %qs", t->get_token_description ())); // skip after somewhere? return nullptr; } return std::unique_ptr ( new AST::MacroMatchRepetition (std::move (matches), op, std::move (separator), t->get_locus ())); } /* Parses a visibility syntactical production (i.e. creating a non-default * visibility) */ template AST::Visibility Parser::parse_visibility () { // check for no visibility if (lexer.peek_token ()->get_id () != PUB) { return AST::Visibility::create_private (); } lexer.skip_token (); // create simple pub visibility if no parentheses if (lexer.peek_token ()->get_id () != LEFT_PAREN) { return AST::Visibility::create_public (); // or whatever } lexer.skip_token (); const_TokenPtr t = lexer.peek_token (); auto path_loc = t->get_locus (); switch (t->get_id ()) { case CRATE: lexer.skip_token (); skip_token (RIGHT_PAREN); return AST::Visibility::create_crate (path_loc); case SELF: lexer.skip_token (); skip_token (RIGHT_PAREN); return AST::Visibility::create_self (path_loc); case SUPER: lexer.skip_token (); skip_token (RIGHT_PAREN); return AST::Visibility::create_super (path_loc); case IN: { lexer.skip_token (); // parse the "in" path as well AST::SimplePath path = parse_simple_path (); if (path.is_empty ()) { Error error (lexer.peek_token ()->get_locus (), "missing path in pub(in path) visibility"); add_error (std::move (error)); // skip after somewhere? return AST::Visibility::create_error (); } skip_token (RIGHT_PAREN); return AST::Visibility::create_in_path (std::move (path)); } default: add_error (Error (t->get_locus (), "unexpected token %qs in visibility", t->get_token_description ())); lexer.skip_token (); return AST::Visibility::create_error (); } } // Parses a module - either a bodied module or a module defined in another file. template std::unique_ptr Parser::parse_module (AST::Visibility vis, AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); skip_token (MOD); const_TokenPtr module_name = expect_token (IDENTIFIER); if (module_name == nullptr) { return nullptr; } Identifier name = module_name->get_str (); const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case SEMICOLON: lexer.skip_token (); // Construct an external module return std::unique_ptr ( new AST::Module (std::move (name), std::move (vis), std::move (outer_attrs), locus, lexer.get_filename (), inline_module_stack)); case LEFT_CURLY: { lexer.skip_token (); // parse inner attributes AST::AttrVec inner_attrs = parse_inner_attributes (); std::string module_path_name = extract_module_path (inner_attrs, outer_attrs, name); InlineModuleStackScope scope (*this, std::move (module_path_name)); // parse items std::vector> items; const_TokenPtr tok = lexer.peek_token (); while (tok->get_id () != RIGHT_CURLY) { std::unique_ptr item = parse_item (false); if (item == nullptr) { Error error (tok->get_locus (), "failed to parse item in module"); add_error (std::move (error)); return nullptr; } items.push_back (std::move (item)); tok = lexer.peek_token (); } if (!skip_token (RIGHT_CURLY)) { // skip somewhere? return nullptr; } return std::unique_ptr ( new AST::Module (std::move (name), locus, std::move (items), std::move (vis), std::move (inner_attrs), std::move (outer_attrs))); // module name? } default: add_error ( Error (t->get_locus (), "unexpected token %qs in module declaration/definition item", t->get_token_description ())); lexer.skip_token (); return nullptr; } } // Parses an extern crate declaration (dependency on external crate) template std::unique_ptr Parser::parse_extern_crate (AST::Visibility vis, AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); if (!skip_token (EXTERN_TOK)) { skip_after_semicolon (); return nullptr; } if (!skip_token (CRATE)) { skip_after_semicolon (); return nullptr; } /* parse crate reference name - this has its own syntactical rule in reference * but seems to not be used elsewhere, so i'm putting it here */ const_TokenPtr crate_name_tok = lexer.peek_token (); std::string crate_name; switch (crate_name_tok->get_id ()) { case IDENTIFIER: crate_name = crate_name_tok->get_str (); lexer.skip_token (); break; case SELF: crate_name = "self"; lexer.skip_token (); break; default: add_error ( Error (crate_name_tok->get_locus (), "expecting crate name (identifier or %), found %qs", crate_name_tok->get_token_description ())); skip_after_semicolon (); return nullptr; } // don't parse as clause if it doesn't exist if (lexer.peek_token ()->get_id () == SEMICOLON) { lexer.skip_token (); return std::unique_ptr ( new AST::ExternCrate (std::move (crate_name), std::move (vis), std::move (outer_attrs), locus)); } /* parse as clause - this also has its own syntactical rule in reference and * also seems to not be used elsewhere, so including here again. */ if (!skip_token (AS)) { skip_after_semicolon (); return nullptr; } const_TokenPtr as_name_tok = lexer.peek_token (); std::string as_name; switch (as_name_tok->get_id ()) { case IDENTIFIER: as_name = as_name_tok->get_str (); lexer.skip_token (); break; case UNDERSCORE: as_name = "_"; lexer.skip_token (); break; default: add_error ( Error (as_name_tok->get_locus (), "expecting as clause name (identifier or %<_%>), found %qs", as_name_tok->get_token_description ())); skip_after_semicolon (); return nullptr; } if (!skip_token (SEMICOLON)) { skip_after_semicolon (); return nullptr; } return std::unique_ptr ( new AST::ExternCrate (std::move (crate_name), std::move (vis), std::move (outer_attrs), locus, std::move (as_name))); } // Parses a use declaration. template std::unique_ptr Parser::parse_use_decl (AST::Visibility vis, AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); if (!skip_token (USE)) { skip_after_semicolon (); return nullptr; } // parse use tree, which is required std::unique_ptr use_tree = parse_use_tree (); if (use_tree == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse use tree in use declaration"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } if (!skip_token (SEMICOLON)) { skip_after_semicolon (); return nullptr; } return std::unique_ptr ( new AST::UseDeclaration (std::move (use_tree), std::move (vis), std::move (outer_attrs), locus)); } // Parses a use tree (which can be recursive and is actually a base class). template std::unique_ptr Parser::parse_use_tree () { /* potential syntax definitions in attempt to get algorithm: * Glob: * <- SimplePath :: * * <- :: * * <- * * Nested tree thing: * <- SimplePath :: { COMPLICATED_INNER_TREE_THING } * <- :: COMPLICATED_INNER_TREE_THING } * <- { COMPLICATED_INNER_TREE_THING } * Rebind thing: * <- SimplePath as IDENTIFIER * <- SimplePath as _ * <- SimplePath */ /* current plan of attack: try to parse SimplePath first - if fails, one of * top two then try parse :: - if fails, one of top two. Next is deciding * character for top two. */ /* Thus, parsing smaller parts of use tree may require feeding into function * via parameters (or could handle all in this single function because other * use tree types aren't recognised as separate in the spec) */ // TODO: I think this function is too complex, probably should split it Location locus = lexer.peek_token ()->get_locus (); // bool has_path = false; AST::SimplePath path = parse_simple_path (); if (path.is_empty ()) { // has no path, so must be glob or nested tree UseTree type bool is_global = false; // check for global scope resolution operator if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) { lexer.skip_token (); is_global = true; } const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case ASTERISK: // glob UseTree type lexer.skip_token (); if (is_global) return std::unique_ptr ( new AST::UseTreeGlob (AST::UseTreeGlob::GLOBAL, AST::SimplePath::create_empty (), locus)); else return std::unique_ptr ( new AST::UseTreeGlob (AST::UseTreeGlob::NO_PATH, AST::SimplePath::create_empty (), locus)); case LEFT_CURLY: { // nested tree UseTree type lexer.skip_token (); std::vector> use_trees; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_CURLY) { std::unique_ptr use_tree = parse_use_tree (); if (use_tree == nullptr) { break; } use_trees.push_back (std::move (use_tree)); if (lexer.peek_token ()->get_id () != COMMA) break; lexer.skip_token (); t = lexer.peek_token (); } // skip end curly delimiter if (!skip_token (RIGHT_CURLY)) { // skip after somewhere? return nullptr; } if (is_global) return std::unique_ptr ( new AST::UseTreeList (AST::UseTreeList::GLOBAL, AST::SimplePath::create_empty (), std::move (use_trees), locus)); else return std::unique_ptr ( new AST::UseTreeList (AST::UseTreeList::NO_PATH, AST::SimplePath::create_empty (), std::move (use_trees), locus)); } case AS: // this is not allowed add_error (Error ( t->get_locus (), "use declaration with rebind % requires a valid simple path - " "none found")); skip_after_semicolon (); return nullptr; default: add_error (Error (t->get_locus (), "unexpected token %qs in use tree with " "no valid simple path (i.e. list" " or glob use tree)", t->get_token_description ())); skip_after_semicolon (); return nullptr; } } else { /* Due to aforementioned implementation issues, the trailing :: token is * consumed by the path, so it can not be used as a disambiguator. * NOPE, not true anymore - TODO what are the consequences of this? */ const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case ASTERISK: // glob UseTree type lexer.skip_token (); return std::unique_ptr ( new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED, std::move (path), locus)); case LEFT_CURLY: { // nested tree UseTree type lexer.skip_token (); std::vector> use_trees; // TODO: think of better control structure const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_CURLY) { std::unique_ptr use_tree = parse_use_tree (); if (use_tree == nullptr) { break; } use_trees.push_back (std::move (use_tree)); if (lexer.peek_token ()->get_id () != COMMA) break; lexer.skip_token (); t = lexer.peek_token (); } // skip end curly delimiter if (!skip_token (RIGHT_CURLY)) { // skip after somewhere? return nullptr; } return std::unique_ptr ( new AST::UseTreeList (AST::UseTreeList::PATH_PREFIXED, std::move (path), std::move (use_trees), locus)); } case AS: { // rebind UseTree type lexer.skip_token (); const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case IDENTIFIER: // skip lexer token lexer.skip_token (); return std::unique_ptr ( new AST::UseTreeRebind (AST::UseTreeRebind::IDENTIFIER, std::move (path), locus, t->get_str ())); case UNDERSCORE: // skip lexer token lexer.skip_token (); return std::unique_ptr ( new AST::UseTreeRebind (AST::UseTreeRebind::WILDCARD, std::move (path), locus, "_")); default: add_error (Error ( t->get_locus (), "unexpected token %qs in use tree with as clause - expected " "identifier or %<_%>", t->get_token_description ())); skip_after_semicolon (); return nullptr; } } case SEMICOLON: // rebind UseTree type without rebinding - path only // don't skip semicolon - handled in parse_use_tree // lexer.skip_token(); return std::unique_ptr ( new AST::UseTreeRebind (AST::UseTreeRebind::NONE, std::move (path), locus)); case COMMA: case RIGHT_CURLY: // this may occur in recursive calls - assume it is ok and ignore it return std::unique_ptr ( new AST::UseTreeRebind (AST::UseTreeRebind::NONE, std::move (path), locus)); default: add_error (Error (t->get_locus (), "unexpected token %qs in use tree with valid path", t->get_token_description ())); // skip_after_semicolon(); return nullptr; } } } // Parses a function (not a method). template std::unique_ptr Parser::parse_function (AST::Visibility vis, AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); // Get qualifiers for function if they exist AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); skip_token (FN_TOK); // Save function name token const_TokenPtr function_name_tok = expect_token (IDENTIFIER); if (function_name_tok == nullptr) { skip_after_next_block (); return nullptr; } Identifier function_name = function_name_tok->get_str (); // parse generic params - if exist std::vector> generic_params = parse_generic_params_in_angles (); if (!skip_token (LEFT_PAREN)) { Error error (lexer.peek_token ()->get_locus (), "function declaration missing opening parentheses before " "parameter list"); add_error (std::move (error)); skip_after_next_block (); return nullptr; } // parse function parameters (only if next token isn't right paren) std::vector function_params; if (lexer.peek_token ()->get_id () != RIGHT_PAREN) function_params = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); if (!skip_token (RIGHT_PAREN)) { Error error (lexer.peek_token ()->get_locus (), "function declaration missing closing parentheses after " "parameter list"); add_error (std::move (error)); skip_after_next_block (); return nullptr; } // parse function return type - if exists std::unique_ptr return_type = parse_function_return_type (); // parse where clause - if exists AST::WhereClause where_clause = parse_where_clause (); // parse block expression std::unique_ptr block_expr = parse_block_expr (); return std::unique_ptr ( new AST::Function (std::move (function_name), std::move (qualifiers), std::move (generic_params), std::move (function_params), std::move (return_type), std::move (where_clause), std::move (block_expr), std::move (vis), std::move (outer_attrs), locus)); } // Parses function or method qualifiers (i.e. const, unsafe, and extern). template AST::FunctionQualifiers Parser::parse_function_qualifiers () { AsyncConstStatus const_status = NONE; bool has_unsafe = false; bool has_extern = false; std::string abi; // Check in order of const, unsafe, then extern const_TokenPtr t = lexer.peek_token (); Location locus = t->get_locus (); switch (t->get_id ()) { case CONST: lexer.skip_token (); const_status = CONST_FN; break; case ASYNC: lexer.skip_token (); const_status = ASYNC_FN; break; default: // const status is still none break; } if (lexer.peek_token ()->get_id () == UNSAFE) { lexer.skip_token (); has_unsafe = true; } if (lexer.peek_token ()->get_id () == EXTERN_TOK) { lexer.skip_token (); has_extern = true; // detect optional abi name const_TokenPtr next_tok = lexer.peek_token (); if (next_tok->get_id () == STRING_LITERAL) { lexer.skip_token (); abi = next_tok->get_str (); } } return AST::FunctionQualifiers (locus, const_status, has_unsafe, has_extern, std::move (abi)); } // Parses generic (lifetime or type) params inside angle brackets (optional). template std::vector> Parser::parse_generic_params_in_angles () { if (lexer.peek_token ()->get_id () != LEFT_ANGLE) { // seems to be no generic params, so exit with empty vector return std::vector> (); } lexer.skip_token (); // DEBUG: rust_debug ("skipped left angle in generic param"); std::vector> generic_params = parse_generic_params (is_right_angle_tok); // DEBUG: rust_debug ("finished parsing actual generic params (i.e. inside angles)"); if (!skip_generics_right_angle ()) { // DEBUG rust_debug ("failed to skip generics right angle - returning empty " "generic params"); return std::vector> (); } return generic_params; } template template std::unique_ptr Parser::parse_generic_param (EndTokenPred is_end_token) { auto token = lexer.peek_token (); auto outer_attrs = parse_outer_attribute (); std::unique_ptr param; switch (token->get_id ()) { case LIFETIME: { auto lifetime = parse_lifetime (); if (lifetime.is_error ()) { rust_error_at ( token->get_locus (), "failed to parse lifetime in generic parameter list"); return nullptr; } std::vector lifetime_bounds; if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); // parse required bounds lifetime_bounds = parse_lifetime_bounds ([is_end_token] (TokenId id) { return is_end_token (id) || id == COMMA; }); } param = std::unique_ptr (new AST::LifetimeParam ( std::move (lifetime), std::move (lifetime_bounds), std::move (outer_attrs), token->get_locus ())); break; } case IDENTIFIER: { auto type_ident = token->get_str (); lexer.skip_token (); std::vector> type_param_bounds; if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); // parse optional type param bounds type_param_bounds = parse_type_param_bounds (); } std::unique_ptr type = nullptr; if (lexer.peek_token ()->get_id () == EQUAL) { lexer.skip_token (); // parse required type type = parse_type (); if (!type) { rust_error_at ( lexer.peek_token ()->get_locus (), "failed to parse type in type param in generic params"); return nullptr; } } param = std::unique_ptr ( new AST::TypeParam (std::move (type_ident), token->get_locus (), std::move (type_param_bounds), std::move (type), std::move (outer_attrs))); break; } case CONST: { lexer.skip_token (); auto name_token = expect_token (IDENTIFIER); if (!name_token || !expect_token (COLON)) return nullptr; auto type = parse_type (); if (!type) return nullptr; // optional default value auto default_expr = AST::GenericArg::create_error (); if (lexer.peek_token ()->get_id () == EQUAL) { lexer.skip_token (); auto tok = lexer.peek_token (); default_expr = parse_generic_arg (); if (default_expr.is_error ()) rust_error_at (tok->get_locus (), "invalid token for start of default value for " "const generic parameter: expected %, " "% or %, got %qs", token_id_to_str (tok->get_id ())); // At this point, we *know* that we are parsing a const // expression if (default_expr.get_kind () == AST::GenericArg::Kind::Either) default_expr = default_expr.disambiguate_to_const (); } param = std::unique_ptr ( new AST::ConstGenericParam (name_token->get_str (), std::move (type), default_expr, std::move (outer_attrs), token->get_locus ())); break; } default: // FIXME: Can we clean this last call with a method call? rust_error_at (token->get_locus (), "unexpected token when parsing generic parameters: %qs", token->get_str ().c_str ()); return nullptr; } return param; } /* Parse generic (lifetime or type) params NOT INSIDE ANGLE BRACKETS!!! Almost * always parse_generic_params_in_angles is what is wanted. */ template template std::vector> Parser::parse_generic_params (EndTokenPred is_end_token) { std::vector> generic_params; /* can't parse lifetime and type params separately due to lookahead issues * thus, parse them all here */ /* HACK: used to retain attribute data if a lifetime param is tentatively * parsed but it turns out to be type param */ AST::Attribute parsed_outer_attr = AST::Attribute::create_empty (); // Did we parse a generic type param yet auto type_seen = false; // Did the user write a lifetime parameter after a type one auto order_error = false; // parse lifetime params while (!is_end_token (lexer.peek_token ()->get_id ())) { auto param = parse_generic_param (is_end_token); if (param) { // TODO: Handle `Const` here as well if necessary if (param->get_kind () == AST::GenericParam::Kind::Type) type_seen = true; else if (param->get_kind () == AST::GenericParam::Kind::Lifetime && type_seen) order_error = true; generic_params.emplace_back (std::move (param)); maybe_skip_token (COMMA); } } // FIXME: Add reordering hint if (order_error) rust_error_at (generic_params.front ()->get_locus (), "invalid order for generic parameters: lifetimes should " "always come before types"); generic_params.shrink_to_fit (); return generic_params; } /* Parses lifetime generic parameters (pointers). Will also consume any * trailing comma. No extra checks for end token. */ template std::vector> Parser::parse_lifetime_params () { std::vector> lifetime_params; while (lexer.peek_token ()->get_id () != END_OF_FILE) { AST::LifetimeParam lifetime_param = parse_lifetime_param (); if (lifetime_param.is_error ()) { // can't treat as error as only way to get out with trailing comma break; } lifetime_params.push_back (std::unique_ptr ( new AST::LifetimeParam (std::move (lifetime_param)))); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } lifetime_params.shrink_to_fit (); return lifetime_params; } /* Parses lifetime generic parameters (pointers). Will also consume any * trailing comma. Has extra is_end_token predicate checking. */ template template std::vector> Parser::parse_lifetime_params (EndTokenPred is_end_token) { std::vector> lifetime_params; // if end_token is not specified, it defaults to EOF, so should work fine while (!is_end_token (lexer.peek_token ()->get_id ())) { AST::LifetimeParam lifetime_param = parse_lifetime_param (); if (lifetime_param.is_error ()) { /* TODO: is it worth throwing away all lifetime params just because * one failed? */ Error error (lexer.peek_token ()->get_locus (), "failed to parse lifetime param in lifetime params"); add_error (std::move (error)); return {}; } lifetime_params.push_back (std::unique_ptr ( new AST::LifetimeParam (std::move (lifetime_param)))); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } lifetime_params.shrink_to_fit (); return lifetime_params; } /* Parses lifetime generic parameters (objects). Will also consume any * trailing comma. No extra checks for end token. * TODO: is this best solution? implements most of the same algorithm. */ template std::vector Parser::parse_lifetime_params_objs () { std::vector lifetime_params; // bad control structure as end token cannot be guaranteed while (true) { AST::LifetimeParam lifetime_param = parse_lifetime_param (); if (lifetime_param.is_error ()) { // not an error as only way to exit if trailing comma break; } lifetime_params.push_back (std::move (lifetime_param)); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } lifetime_params.shrink_to_fit (); return lifetime_params; } /* Parses lifetime generic parameters (objects). Will also consume any * trailing comma. Has extra is_end_token predicate checking. * TODO: is this best solution? implements most of the same algorithm. */ template template std::vector Parser::parse_lifetime_params_objs ( EndTokenPred is_end_token) { std::vector lifetime_params; while (!is_end_token (lexer.peek_token ()->get_id ())) { AST::LifetimeParam lifetime_param = parse_lifetime_param (); if (lifetime_param.is_error ()) { /* TODO: is it worth throwing away all lifetime params just because * one failed? */ Error error (lexer.peek_token ()->get_locus (), "failed to parse lifetime param in lifetime params"); add_error (std::move (error)); return {}; } lifetime_params.push_back (std::move (lifetime_param)); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } lifetime_params.shrink_to_fit (); return lifetime_params; } /* Parses a sequence of a certain grammar rule in object form (not pointer or * smart pointer), delimited by commas and ending when 'is_end_token' is * satisfied (templated). Will also consume any trailing comma. * FIXME: this cannot be used due to member function pointer problems (i.e. * parsing_function cannot be specified properly) */ template template auto Parser::parse_non_ptr_sequence ( ParseFunction parsing_function, EndTokenPred is_end_token, std::string error_msg) -> std::vector { std::vector params; while (!is_end_token (lexer.peek_token ()->get_id ())) { auto param = parsing_function (); if (param.is_error ()) { // TODO: is it worth throwing away all params just because one // failed? Error error (lexer.peek_token ()->get_locus (), std::move (error_msg)); add_error (std::move (error)); return {}; } params.push_back (std::move (param)); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } params.shrink_to_fit (); return params; } /* Parses a single lifetime generic parameter (not including comma). */ template AST::LifetimeParam Parser::parse_lifetime_param () { // parse outer attribute, which is optional and may not exist AST::Attribute outer_attr = parse_outer_attribute (); // save lifetime token - required const_TokenPtr lifetime_tok = lexer.peek_token (); if (lifetime_tok->get_id () != LIFETIME) { // if lifetime is missing, must not be a lifetime param, so return null return AST::LifetimeParam::create_error (); } lexer.skip_token (); /* TODO: does this always create a named lifetime? or can a different type * be made? */ AST::Lifetime lifetime (AST::Lifetime::NAMED, lifetime_tok->get_str (), lifetime_tok->get_locus ()); // parse lifetime bounds, if it exists std::vector lifetime_bounds; if (lexer.peek_token ()->get_id () == COLON) { // parse lifetime bounds lifetime_bounds = parse_lifetime_bounds (); // TODO: have end token passed in? } return AST::LifetimeParam (std::move (lifetime), std::move (lifetime_bounds), std::move (outer_attr), lifetime_tok->get_locus ()); } // Parses type generic parameters. Will also consume any trailing comma. template std::vector> Parser::parse_type_params () { std::vector> type_params; // infinite loop with break on failure as no info on ending token while (true) { std::unique_ptr type_param = parse_type_param (); if (type_param == nullptr) { // break if fails to parse break; } type_params.push_back (std::move (type_param)); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } type_params.shrink_to_fit (); return type_params; } // Parses type generic parameters. Will also consume any trailing comma. template template std::vector> Parser::parse_type_params (EndTokenPred is_end_token) { std::vector> type_params; while (!is_end_token (lexer.peek_token ()->get_id ())) { std::unique_ptr type_param = parse_type_param (); if (type_param == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse type param in type params"); add_error (std::move (error)); return {}; } type_params.push_back (std::move (type_param)); if (lexer.peek_token ()->get_id () != COMMA) break; // skip commas, including trailing commas lexer.skip_token (); } type_params.shrink_to_fit (); return type_params; /* TODO: this shares most code with parse_lifetime_params - good place to * use template (i.e. parse_non_ptr_sequence if doable) */ } /* Parses a single type (generic) parameter, not including commas. May change * to return value. */ template std::unique_ptr Parser::parse_type_param () { // parse outer attribute, which is optional and may not exist AST::Attribute outer_attr = parse_outer_attribute (); const_TokenPtr identifier_tok = lexer.peek_token (); if (identifier_tok->get_id () != IDENTIFIER) { // return null as type param can't exist without this required // identifier return nullptr; } // TODO: create identifier from identifier token Identifier ident = identifier_tok->get_str (); lexer.skip_token (); // parse type param bounds (if they exist) std::vector> type_param_bounds; if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); // parse type param bounds, which may or may not exist type_param_bounds = parse_type_param_bounds (); } // parse type (if it exists) std::unique_ptr type = nullptr; if (lexer.peek_token ()->get_id () == EQUAL) { lexer.skip_token (); // parse type (now required) type = parse_type (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse type in type param"); add_error (std::move (error)); return nullptr; } } return std::unique_ptr ( new AST::TypeParam (std::move (ident), identifier_tok->get_locus (), std::move (type_param_bounds), std::move (type), std::move (outer_attr))); } /* Parses regular (i.e. non-generic) parameters in functions or methods. Also * has end token handling. */ template template std::vector Parser::parse_function_params (EndTokenPred is_end_token) { std::vector params; if (is_end_token (lexer.peek_token ()->get_id ())) return params; AST::FunctionParam initial_param = parse_function_param (); // Return empty parameter list if no parameter there if (initial_param.is_error ()) { // TODO: is this an error? return params; } params.push_back (std::move (initial_param)); // maybe think of a better control structure here - do-while with an initial // error state? basically, loop through parameter list until can't find any // more params const_TokenPtr t = lexer.peek_token (); while (t->get_id () == COMMA) { // skip comma if applies lexer.skip_token (); // TODO: strictly speaking, shouldn't there be no trailing comma? if (is_end_token (lexer.peek_token ()->get_id ())) break; // now, as right paren would break, function param is required AST::FunctionParam param = parse_function_param (); if (param.is_error ()) { Error error (lexer.peek_token ()->get_locus (), "failed to parse function param (in function params)"); add_error (std::move (error)); // skip somewhere? return std::vector (); } params.push_back (std::move (param)); t = lexer.peek_token (); } params.shrink_to_fit (); return params; } /* Parses a single regular (i.e. non-generic) parameter in a function or * method, i.e. the "name: type" bit. Also handles it not existing. */ template AST::FunctionParam Parser::parse_function_param () { // parse outer attributes if they exist AST::AttrVec outer_attrs = parse_outer_attributes (); // TODO: should saved location be at start of outer attributes or pattern? Location locus = lexer.peek_token ()->get_locus (); std::unique_ptr param_pattern = parse_pattern (); // create error function param if it doesn't exist if (param_pattern == nullptr) { // skip after something return AST::FunctionParam::create_error (); } if (!skip_token (COLON)) { // skip after something return AST::FunctionParam::create_error (); } std::unique_ptr param_type = parse_type (); if (param_type == nullptr) { // skip? return AST::FunctionParam::create_error (); } return AST::FunctionParam (std::move (param_pattern), std::move (param_type), std::move (outer_attrs), locus); } /* Parses a function or method return type syntactical construction. Also * handles a function return type not existing. */ template std::unique_ptr Parser::parse_function_return_type () { if (lexer.peek_token ()->get_id () != RETURN_TYPE) return nullptr; // skip return type, as it now obviously exists lexer.skip_token (); std::unique_ptr type = parse_type (); return type; } /* Parses a "where clause" (in a function, struct, method, etc.). Also handles * a where clause not existing, in which it will return * WhereClause::create_empty(), which can be checked via * WhereClause::is_empty(). */ template AST::WhereClause Parser::parse_where_clause () { const_TokenPtr where_tok = lexer.peek_token (); if (where_tok->get_id () != WHERE) { // where clause doesn't exist, so create empty one return AST::WhereClause::create_empty (); } lexer.skip_token (); /* parse where clause items - this is not a separate rule in the reference * so won't be here */ std::vector> where_clause_items; /* HACK: where clauses end with a right curly or semicolon or equals in all * uses currently */ const_TokenPtr t = lexer.peek_token (); while (t->get_id () != LEFT_CURLY && t->get_id () != SEMICOLON && t->get_id () != EQUAL) { std::unique_ptr where_clause_item = parse_where_clause_item (); if (where_clause_item == nullptr) { Error error (t->get_locus (), "failed to parse where clause item"); add_error (std::move (error)); return AST::WhereClause::create_empty (); } where_clause_items.push_back (std::move (where_clause_item)); // also skip comma if it exists if (lexer.peek_token ()->get_id () != COMMA) break; lexer.skip_token (); t = lexer.peek_token (); } where_clause_items.shrink_to_fit (); return AST::WhereClause (std::move (where_clause_items)); } /* Parses a where clause item (lifetime or type bound). Does not parse any * commas. */ template std::unique_ptr Parser::parse_where_clause_item () { // shitty cheat way of determining lifetime or type bound - test for // lifetime const_TokenPtr t = lexer.peek_token (); if (t->get_id () == LIFETIME) return parse_lifetime_where_clause_item (); else return parse_type_bound_where_clause_item (); } // Parses a lifetime where clause item. template std::unique_ptr Parser::parse_lifetime_where_clause_item () { AST::Lifetime lifetime = parse_lifetime (); if (lifetime.is_error ()) { // TODO: error here? return nullptr; } if (!skip_token (COLON)) { // TODO: skip after somewhere return nullptr; } std::vector lifetime_bounds = parse_lifetime_bounds (); // TODO: have end token passed in? Location locus = lifetime.get_locus (); return std::unique_ptr ( new AST::LifetimeWhereClauseItem (std::move (lifetime), std::move (lifetime_bounds), locus)); } // Parses a type bound where clause item. template std::unique_ptr Parser::parse_type_bound_where_clause_item () { // parse for lifetimes, if it exists std::vector for_lifetimes; if (lexer.peek_token ()->get_id () == FOR) for_lifetimes = parse_for_lifetimes (); std::unique_ptr type = parse_type (); if (type == nullptr) { return nullptr; } if (!skip_token (COLON)) { // TODO: skip after somewhere return nullptr; } // parse type param bounds if they exist std::vector> type_param_bounds = parse_type_param_bounds (); Location locus = lexer.peek_token ()->get_locus (); return std::unique_ptr ( new AST::TypeBoundWhereClauseItem (std::move (for_lifetimes), std::move (type), std::move (type_param_bounds), locus)); } // Parses a for lifetimes clause, including the for keyword and angle // brackets. template std::vector Parser::parse_for_lifetimes () { std::vector params; if (!skip_token (FOR)) { // skip after somewhere? return params; } if (!skip_token (LEFT_ANGLE)) { // skip after somewhere? return params; } /* cannot specify end token due to parsing problems with '>' tokens being * nested */ params = parse_lifetime_params_objs (is_right_angle_tok); if (!skip_generics_right_angle ()) { // DEBUG rust_debug ("failed to skip generics right angle after (supposedly) " "finished parsing where clause items"); // ok, well this gets called. // skip after somewhere? return params; } return params; } // Parses type parameter bounds in where clause or generic arguments. template std::vector> Parser::parse_type_param_bounds () { std::vector> type_param_bounds; std::unique_ptr initial_bound = parse_type_param_bound (); // quick exit if null if (initial_bound == nullptr) { /* error? type param bounds must have at least one term, but are bounds * optional? */ return type_param_bounds; } type_param_bounds.push_back (std::move (initial_bound)); while (lexer.peek_token ()->get_id () == PLUS) { lexer.skip_token (); std::unique_ptr bound = parse_type_param_bound (); if (bound == nullptr) { /* not an error: bound is allowed to be null as trailing plus is * allowed */ return type_param_bounds; } type_param_bounds.push_back (std::move (bound)); } type_param_bounds.shrink_to_fit (); return type_param_bounds; } /* Parses type parameter bounds in where clause or generic arguments, with end * token handling. */ template template std::vector> Parser::parse_type_param_bounds (EndTokenPred is_end_token) { std::vector> type_param_bounds; std::unique_ptr initial_bound = parse_type_param_bound (); // quick exit if null if (initial_bound == nullptr) { /* error? type param bounds must have at least one term, but are bounds * optional? */ return type_param_bounds; } type_param_bounds.push_back (std::move (initial_bound)); while (lexer.peek_token ()->get_id () == PLUS) { lexer.skip_token (); // break if end token character if (is_end_token (lexer.peek_token ()->get_id ())) break; std::unique_ptr bound = parse_type_param_bound (); if (bound == nullptr) { // TODO how wise is it to ditch all bounds if only one failed? Error error (lexer.peek_token ()->get_locus (), "failed to parse type param bound in type param bounds"); add_error (std::move (error)); return {}; } type_param_bounds.push_back (std::move (bound)); } type_param_bounds.shrink_to_fit (); return type_param_bounds; } /* Parses a single type parameter bound in a where clause or generic argument. * Does not parse the '+' between arguments. */ template std::unique_ptr Parser::parse_type_param_bound () { // shitty cheat way of determining lifetime or trait bound - test for // lifetime const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case LIFETIME: return std::unique_ptr ( new AST::Lifetime (parse_lifetime ())); case LEFT_PAREN: case QUESTION_MARK: case FOR: case IDENTIFIER: case SUPER: case SELF: case SELF_ALIAS: case CRATE: case DOLLAR_SIGN: return parse_trait_bound (); default: // don't error - assume this is fine TODO return nullptr; } } // Parses a trait bound type param bound. template std::unique_ptr Parser::parse_trait_bound () { bool has_parens = false; bool has_question_mark = false; Location locus = lexer.peek_token ()->get_locus (); // handle trait bound being in parentheses if (lexer.peek_token ()->get_id () == LEFT_PAREN) { has_parens = true; lexer.skip_token (); } // handle having question mark (optional) if (lexer.peek_token ()->get_id () == QUESTION_MARK) { has_question_mark = true; lexer.skip_token (); } /* parse for lifetimes, if it exists (although empty for lifetimes is ok to * handle this) */ std::vector for_lifetimes; if (lexer.peek_token ()->get_id () == FOR) for_lifetimes = parse_for_lifetimes (); // handle TypePath AST::TypePath type_path = parse_type_path (); // handle closing parentheses if (has_parens) { if (!skip_token (RIGHT_PAREN)) { return nullptr; } } return std::unique_ptr ( new AST::TraitBound (std::move (type_path), locus, has_parens, has_question_mark, std::move (for_lifetimes))); } // Parses lifetime bounds. template std::vector Parser::parse_lifetime_bounds () { std::vector lifetime_bounds; while (true) { AST::Lifetime lifetime = parse_lifetime (); // quick exit for parsing failure if (lifetime.is_error ()) break; lifetime_bounds.push_back (std::move (lifetime)); /* plus is maybe not allowed at end - spec defines it weirdly, so * assuming allowed at end */ if (lexer.peek_token ()->get_id () != PLUS) break; lexer.skip_token (); } lifetime_bounds.shrink_to_fit (); return lifetime_bounds; } // Parses lifetime bounds, with added check for ending token. template template std::vector Parser::parse_lifetime_bounds (EndTokenPred is_end_token) { std::vector lifetime_bounds; while (!is_end_token (lexer.peek_token ()->get_id ())) { AST::Lifetime lifetime = parse_lifetime (); if (lifetime.is_error ()) { /* TODO: is it worth throwing away all lifetime bound info just * because one failed? */ Error error (lexer.peek_token ()->get_locus (), "failed to parse lifetime in lifetime bounds"); add_error (std::move (error)); return {}; } lifetime_bounds.push_back (std::move (lifetime)); /* plus is maybe not allowed at end - spec defines it weirdly, so * assuming allowed at end */ if (lexer.peek_token ()->get_id () != PLUS) break; lexer.skip_token (); } lifetime_bounds.shrink_to_fit (); return lifetime_bounds; } /* Parses a lifetime token (named, 'static, or '_). Also handles lifetime not * existing. */ template AST::Lifetime Parser::parse_lifetime () { const_TokenPtr lifetime_tok = lexer.peek_token (); Location locus = lifetime_tok->get_locus (); // create error lifetime if doesn't exist if (lifetime_tok->get_id () != LIFETIME) { return AST::Lifetime::error (); } lexer.skip_token (); std::string lifetime_ident = lifetime_tok->get_str (); if (lifetime_ident == "'static") { return AST::Lifetime (AST::Lifetime::STATIC, "", locus); } else if (lifetime_ident == "'_") { return AST::Lifetime (AST::Lifetime::WILDCARD, "", locus); } else { return AST::Lifetime (AST::Lifetime::NAMED, std::move (lifetime_ident), locus); } } // Parses a "type alias" (typedef) item. template std::unique_ptr Parser::parse_type_alias (AST::Visibility vis, AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); skip_token (TYPE); // TODO: use this token for identifier when finished that const_TokenPtr alias_name_tok = expect_token (IDENTIFIER); if (alias_name_tok == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse identifier in type alias"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } Identifier alias_name = alias_name_tok->get_str (); // parse generic params, which may not exist std::vector> generic_params = parse_generic_params_in_angles (); // parse where clause, which may not exist AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (EQUAL)) { skip_after_semicolon (); return nullptr; } std::unique_ptr type_to_alias = parse_type (); if (!skip_token (SEMICOLON)) { // should be skipping past this, not the next line return nullptr; } return std::unique_ptr ( new AST::TypeAlias (std::move (alias_name), std::move (generic_params), std::move (where_clause), std::move (type_to_alias), std::move (vis), std::move (outer_attrs), locus)); } // Parse a struct item AST node. template std::unique_ptr Parser::parse_struct (AST::Visibility vis, AST::AttrVec outer_attrs) { /* TODO: determine best way to parse the proper struct vs tuple struct - * share most of initial constructs so lookahead might be impossible, and if * not probably too expensive. Best way is probably unified parsing for the * initial parts and then pass them in as params to more derived functions. * Alternatively, just parse everything in this one function - do this if * function not too long. */ /* Proper struct <- 'struct' IDENTIFIER generic_params? where_clause? ( '{' * struct_fields? '}' | ';' ) */ /* Tuple struct <- 'struct' IDENTIFIER generic_params? '(' tuple_fields? ')' * where_clause? ';' */ Location locus = lexer.peek_token ()->get_locus (); skip_token (STRUCT_TOK); // parse struct name const_TokenPtr name_tok = expect_token (IDENTIFIER); if (name_tok == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse struct or tuple struct identifier"); add_error (std::move (error)); // skip after somewhere? return nullptr; } Identifier struct_name = name_tok->get_str (); // parse generic params, which may or may not exist std::vector> generic_params = parse_generic_params_in_angles (); // branch on next token - determines whether proper struct or tuple struct if (lexer.peek_token ()->get_id () == LEFT_PAREN) { // tuple struct // skip left parenthesis lexer.skip_token (); // parse tuple fields std::vector tuple_fields; // Might be empty tuple for unit tuple struct. if (lexer.peek_token ()->get_id () == RIGHT_PAREN) tuple_fields = std::vector (); else tuple_fields = parse_tuple_fields (); // tuple parameters must have closing parenthesis if (!skip_token (RIGHT_PAREN)) { skip_after_semicolon (); return nullptr; } // parse where clause, which is optional AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (SEMICOLON)) { // can't skip after semicolon because it's meant to be here return nullptr; } return std::unique_ptr ( new AST::TupleStruct (std::move (tuple_fields), std::move (struct_name), std::move (generic_params), std::move (where_clause), std::move (vis), std::move (outer_attrs), locus)); } // assume it is a proper struct being parsed and continue outside of switch // - label only here to suppress warning // parse where clause, which is optional AST::WhereClause where_clause = parse_where_clause (); // branch on next token - determines whether struct is a unit struct const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case LEFT_CURLY: { // struct with body // skip curly bracket lexer.skip_token (); // parse struct fields, if any std::vector struct_fields = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); if (!skip_token (RIGHT_CURLY)) { // skip somewhere? return nullptr; } return std::unique_ptr (new AST::StructStruct ( std::move (struct_fields), std::move (struct_name), std::move (generic_params), std::move (where_clause), false, std::move (vis), std::move (outer_attrs), locus)); } case SEMICOLON: // unit struct declaration lexer.skip_token (); return std::unique_ptr ( new AST::StructStruct (std::move (struct_name), std::move (generic_params), std::move (where_clause), std::move (vis), std::move (outer_attrs), locus)); default: add_error (Error (t->get_locus (), "unexpected token %qs in struct declaration", t->get_token_description ())); // skip somewhere? return nullptr; } } // Parses struct fields in struct declarations. template std::vector Parser::parse_struct_fields () { std::vector fields; AST::StructField initial_field = parse_struct_field (); // Return empty field list if no field there if (initial_field.is_error ()) return fields; fields.push_back (std::move (initial_field)); while (lexer.peek_token ()->get_id () == COMMA) { lexer.skip_token (); AST::StructField field = parse_struct_field (); if (field.is_error ()) { // would occur with trailing comma, so allowed break; } fields.push_back (std::move (field)); } fields.shrink_to_fit (); return fields; // TODO: template if possible (parse_non_ptr_seq) } // Parses struct fields in struct declarations. template template std::vector Parser::parse_struct_fields (EndTokenPred is_end_tok) { std::vector fields; AST::StructField initial_field = parse_struct_field (); // Return empty field list if no field there if (initial_field.is_error ()) return fields; fields.push_back (std::move (initial_field)); while (lexer.peek_token ()->get_id () == COMMA) { lexer.skip_token (); if (is_end_tok (lexer.peek_token ()->get_id ())) break; AST::StructField field = parse_struct_field (); if (field.is_error ()) { /* TODO: should every field be ditched just because one couldn't be * parsed? */ Error error (lexer.peek_token ()->get_locus (), "failed to parse struct field in struct fields"); add_error (std::move (error)); return {}; } fields.push_back (std::move (field)); } fields.shrink_to_fit (); return fields; // TODO: template if possible (parse_non_ptr_seq) } // Parses a single struct field (in a struct definition). Does not parse // commas. template AST::StructField Parser::parse_struct_field () { // parse outer attributes, if they exist AST::AttrVec outer_attrs = parse_outer_attributes (); // parse visibility, if it exists AST::Visibility vis = parse_visibility (); Location locus = lexer.peek_token ()->get_locus (); // parse field name const_TokenPtr field_name_tok = lexer.peek_token (); if (field_name_tok->get_id () != IDENTIFIER) { // if not identifier, assumes there is no struct field and exits - not // necessarily error return AST::StructField::create_error (); } Identifier field_name = field_name_tok->get_str (); lexer.skip_token (); if (!skip_token (COLON)) { // skip after somewhere? return AST::StructField::create_error (); } // parse field type - this is required std::unique_ptr field_type = parse_type (); if (field_type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse type in struct field definition"); add_error (std::move (error)); // skip after somewhere return AST::StructField::create_error (); } return AST::StructField (std::move (field_name), std::move (field_type), std::move (vis), locus, std::move (outer_attrs)); } // Parses tuple fields in tuple/tuple struct declarations. template std::vector Parser::parse_tuple_fields () { std::vector fields; AST::TupleField initial_field = parse_tuple_field (); // Return empty field list if no field there if (initial_field.is_error ()) { return fields; } fields.push_back (std::move (initial_field)); // maybe think of a better control structure here - do-while with an initial // error state? basically, loop through field list until can't find any more // params HACK: all current syntax uses of tuple fields have them ending // with a right paren token const_TokenPtr t = lexer.peek_token (); while (t->get_id () == COMMA) { // skip comma if applies - e.g. trailing comma lexer.skip_token (); // break out due to right paren if it exists if (lexer.peek_token ()->get_id () == RIGHT_PAREN) { break; } AST::TupleField field = parse_tuple_field (); if (field.is_error ()) { Error error (lexer.peek_token ()->get_locus (), "failed to parse tuple field in tuple fields"); add_error (std::move (error)); return std::vector (); } fields.push_back (std::move (field)); t = lexer.peek_token (); } fields.shrink_to_fit (); return fields; // TODO: this shares basically all code with function params and struct // fields // - templates? } /* Parses a single tuple struct field in a tuple struct definition. Does not * parse commas. */ template AST::TupleField Parser::parse_tuple_field () { // parse outer attributes if they exist AST::AttrVec outer_attrs = parse_outer_attributes (); // parse visibility if it exists AST::Visibility vis = parse_visibility (); Location locus = lexer.peek_token ()->get_locus (); // parse type, which is required std::unique_ptr field_type = parse_type (); if (field_type == nullptr) { // error if null Error error (lexer.peek_token ()->get_locus (), "could not parse type in tuple struct field"); add_error (std::move (error)); // skip after something return AST::TupleField::create_error (); } return AST::TupleField (std::move (field_type), std::move (vis), locus, std::move (outer_attrs)); } // Parses a Rust "enum" tagged union item definition. template std::unique_ptr Parser::parse_enum (AST::Visibility vis, AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); skip_token (ENUM_TOK); // parse enum name const_TokenPtr enum_name_tok = expect_token (IDENTIFIER); if (enum_name_tok == nullptr) return nullptr; Identifier enum_name = enum_name_tok->get_str (); // parse generic params (of enum container, not enum variants) if they exist std::vector> generic_params = parse_generic_params_in_angles (); // parse where clause if it exists AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (LEFT_CURLY)) { skip_after_end_block (); return nullptr; } // parse actual enum variant definitions std::vector> enum_items = parse_enum_items ([] (TokenId id) { return id == RIGHT_CURLY; }); if (!skip_token (RIGHT_CURLY)) { skip_after_end_block (); return nullptr; } return std::unique_ptr ( new AST::Enum (std::move (enum_name), std::move (vis), std::move (generic_params), std::move (where_clause), std::move (enum_items), std::move (outer_attrs), locus)); } // Parses the enum variants inside an enum definiton. template std::vector> Parser::parse_enum_items () { std::vector> items; std::unique_ptr initial_item = parse_enum_item (); // Return empty item list if no field there if (initial_item == nullptr) return items; items.push_back (std::move (initial_item)); while (lexer.peek_token ()->get_id () == COMMA) { lexer.skip_token (); std::unique_ptr item = parse_enum_item (); if (item == nullptr) { // this would occur with a trailing comma, which is allowed break; } items.push_back (std::move (item)); } items.shrink_to_fit (); return items; /* TODO: use template if doable (parse_non_ptr_sequence) */ } // Parses the enum variants inside an enum definiton. template template std::vector> Parser::parse_enum_items (EndTokenPred is_end_tok) { std::vector> items; std::unique_ptr initial_item = parse_enum_item (); // Return empty item list if no field there if (initial_item == nullptr) return items; items.push_back (std::move (initial_item)); while (lexer.peek_token ()->get_id () == COMMA) { lexer.skip_token (); if (is_end_tok (lexer.peek_token ()->get_id ())) break; std::unique_ptr item = parse_enum_item (); if (item == nullptr) { /* TODO should this ignore all successfully parsed enum items just * because one failed? */ Error error (lexer.peek_token ()->get_locus (), "failed to parse enum item in enum items"); add_error (std::move (error)); return {}; } items.push_back (std::move (item)); } items.shrink_to_fit (); return items; /* TODO: use template if doable (parse_non_ptr_sequence) */ } /* Parses a single enum variant item in an enum definition. Does not parse * commas. */ template std::unique_ptr Parser::parse_enum_item () { // parse outer attributes if they exist AST::AttrVec outer_attrs = parse_outer_attributes (); // parse visibility, which may or may not exist AST::Visibility vis = parse_visibility (); // parse name for enum item, which is required const_TokenPtr item_name_tok = lexer.peek_token (); if (item_name_tok->get_id () != IDENTIFIER) { // this may not be an error but it means there is no enum item here return nullptr; } lexer.skip_token (); Identifier item_name = item_name_tok->get_str (); // branch based on next token const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case LEFT_PAREN: { // tuple enum item lexer.skip_token (); std::vector tuple_fields; // Might be empty tuple for unit tuple enum variant. if (lexer.peek_token ()->get_id () == RIGHT_PAREN) tuple_fields = std::vector (); else tuple_fields = parse_tuple_fields (); if (!skip_token (RIGHT_PAREN)) { // skip after somewhere return nullptr; } return std::unique_ptr (new AST::EnumItemTuple ( std::move (item_name), std::move (vis), std::move (tuple_fields), std::move (outer_attrs), item_name_tok->get_locus ())); } case LEFT_CURLY: { // struct enum item lexer.skip_token (); std::vector struct_fields = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); if (!skip_token (RIGHT_CURLY)) { // skip after somewhere return nullptr; } return std::unique_ptr (new AST::EnumItemStruct ( std::move (item_name), std::move (vis), std::move (struct_fields), std::move (outer_attrs), item_name_tok->get_locus ())); } case EQUAL: { // discriminant enum item lexer.skip_token (); std::unique_ptr discriminant_expr = parse_expr (); return std::unique_ptr ( new AST::EnumItemDiscriminant (std::move (item_name), std::move (vis), std::move (discriminant_expr), std::move (outer_attrs), item_name_tok->get_locus ())); } default: // regular enum with just an identifier return std::unique_ptr ( new AST::EnumItem (std::move (item_name), std::move (vis), std::move (outer_attrs), item_name_tok->get_locus ())); } } // Parses a C-style (and C-compat) untagged union declaration. template std::unique_ptr Parser::parse_union (AST::Visibility vis, AST::AttrVec outer_attrs) { /* hack - "weak keyword" by finding identifier called "union" (lookahead in * item switch) */ const_TokenPtr union_keyword = expect_token (IDENTIFIER); rust_assert (union_keyword->get_str () == "union"); Location locus = union_keyword->get_locus (); // parse actual union name const_TokenPtr union_name_tok = expect_token (IDENTIFIER); if (union_name_tok == nullptr) { skip_after_next_block (); return nullptr; } Identifier union_name = union_name_tok->get_str (); // parse optional generic parameters std::vector> generic_params = parse_generic_params_in_angles (); // parse optional where clause AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (LEFT_CURLY)) { skip_after_end_block (); return nullptr; } /* parse union inner items as "struct fields" because hey, syntax reuse. * Spec said so. */ std::vector union_fields = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); if (!skip_token (RIGHT_CURLY)) { // skip after somewhere return nullptr; } return std::unique_ptr ( new AST::Union (std::move (union_name), std::move (vis), std::move (generic_params), std::move (where_clause), std::move (union_fields), std::move (outer_attrs), locus)); } /* Parses a "constant item" (compile-time constant to maybe "inline" * throughout the program - like constexpr). */ template std::unique_ptr Parser::parse_const_item (AST::Visibility vis, AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); skip_token (CONST); /* get constant identifier - this is either a proper identifier or the _ * wildcard */ const_TokenPtr ident_tok = lexer.peek_token (); // make default identifier the underscore wildcard one std::string ident ("_"); switch (ident_tok->get_id ()) { case IDENTIFIER: ident = ident_tok->get_str (); lexer.skip_token (); break; case UNDERSCORE: // do nothing - identifier is already "_" lexer.skip_token (); break; default: add_error ( Error (ident_tok->get_locus (), "expected item name (identifier or %<_%>) in constant item " "declaration - found %qs", ident_tok->get_token_description ())); skip_after_semicolon (); return nullptr; } if (!skip_token (COLON)) { skip_after_semicolon (); return nullptr; } // parse constant type (required) std::unique_ptr type = parse_type (); if (!skip_token (EQUAL)) { skip_after_semicolon (); return nullptr; } // parse constant expression (required) std::unique_ptr expr = parse_expr (); if (!skip_token (SEMICOLON)) { // skip somewhere? return nullptr; } return std::unique_ptr ( new AST::ConstantItem (std::move (ident), std::move (vis), std::move (type), std::move (expr), std::move (outer_attrs), locus)); } // Parses a "static item" (static storage item, with 'static lifetime). template std::unique_ptr Parser::parse_static_item (AST::Visibility vis, AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); skip_token (STATIC_TOK); // determine whether static item is mutable bool is_mut = false; if (lexer.peek_token ()->get_id () == MUT) { is_mut = true; lexer.skip_token (); } const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) return nullptr; Identifier ident = ident_tok->get_str (); if (!skip_token (COLON)) { skip_after_semicolon (); return nullptr; } // parse static item type (required) std::unique_ptr type = parse_type (); if (!skip_token (EQUAL)) { skip_after_semicolon (); return nullptr; } // parse static item expression (required) std::unique_ptr expr = parse_expr (); if (!skip_token (SEMICOLON)) { // skip after somewhere return nullptr; } return std::unique_ptr ( new AST::StaticItem (std::move (ident), is_mut, std::move (type), std::move (expr), std::move (vis), std::move (outer_attrs), locus)); } // Parses a trait definition item, including unsafe ones. template std::unique_ptr Parser::parse_trait (AST::Visibility vis, AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); bool is_unsafe = false; if (lexer.peek_token ()->get_id () == UNSAFE) { is_unsafe = true; lexer.skip_token (); } skip_token (TRAIT); // parse trait name const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) return nullptr; Identifier ident = ident_tok->get_str (); // parse generic parameters (if they exist) std::vector> generic_params = parse_generic_params_in_angles (); // create placeholder type param bounds in case they don't exist std::vector> type_param_bounds; // parse type param bounds (if they exist) if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); type_param_bounds = parse_type_param_bounds ( [] (TokenId id) { return id == WHERE || id == LEFT_CURLY; }); // type_param_bounds = parse_type_param_bounds (); } // parse where clause (if it exists) AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (LEFT_CURLY)) { skip_after_end_block (); return nullptr; } // parse inner attrs (if they exist) AST::AttrVec inner_attrs = parse_inner_attributes (); // parse trait items std::vector> trait_items; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_CURLY) { std::unique_ptr trait_item = parse_trait_item (); if (trait_item == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse trait item in trait"); add_error (std::move (error)); return nullptr; } trait_items.push_back (std::move (trait_item)); t = lexer.peek_token (); } if (!skip_token (RIGHT_CURLY)) { // skip after something return nullptr; } trait_items.shrink_to_fit (); return std::unique_ptr ( new AST::Trait (std::move (ident), is_unsafe, std::move (generic_params), std::move (type_param_bounds), std::move (where_clause), std::move (trait_items), std::move (vis), std::move (outer_attrs), std::move (inner_attrs), locus)); } // Parses a trait item used inside traits (not trait, the Item). template std::unique_ptr Parser::parse_trait_item () { // parse outer attributes (if they exist) AST::AttrVec outer_attrs = parse_outer_attributes (); // lookahead to determine what type of trait item to parse const_TokenPtr tok = lexer.peek_token (); switch (tok->get_id ()) { case TYPE: return parse_trait_type (std::move (outer_attrs)); case CONST: // disambiguate with function qualifier if (lexer.peek_token (1)->get_id () == IDENTIFIER) { return parse_trait_const (std::move (outer_attrs)); } // else, fallthrough to function // TODO: find out how to disable gcc "implicit fallthrough" error gcc_fallthrough (); case UNSAFE: case EXTERN_TOK: case FN_TOK: { /* function and method can't be disambiguated by lookahead alone * (without a lot of work and waste), so either make a * "parse_trait_function_or_method" or parse here mostly and pass in * most parameters (or if short enough, parse whole thing here). */ // parse function and method here // parse function or method qualifiers AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); skip_token (FN_TOK); // parse function or method name const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) return nullptr; Identifier ident = ident_tok->get_str (); // parse generic params std::vector> generic_params = parse_generic_params_in_angles (); if (!skip_token (LEFT_PAREN)) { // skip after somewhere? return nullptr; } /* now for function vs method disambiguation - method has opening * "self" param */ AST::SelfParam self_param = parse_self_param (); /* FIXME: ensure that self param doesn't accidently consume tokens for * a function */ bool is_method = false; if (!self_param.is_error ()) { is_method = true; /* skip comma so function and method regular params can be parsed * in same way */ if (lexer.peek_token ()->get_id () == COMMA) lexer.skip_token (); } // parse trait function params std::vector function_params = parse_function_params ( [] (TokenId id) { return id == RIGHT_PAREN; }); if (!skip_token (RIGHT_PAREN)) { // skip after somewhere? return nullptr; } // parse return type (optional) std::unique_ptr return_type = parse_function_return_type (); // parse where clause (optional) AST::WhereClause where_clause = parse_where_clause (); // parse semicolon or function definition (in block) const_TokenPtr t = lexer.peek_token (); std::unique_ptr definition = nullptr; switch (t->get_id ()) { case SEMICOLON: lexer.skip_token (); // definition is already nullptr, so don't need to change it break; case LEFT_CURLY: definition = parse_block_expr (); /* FIXME: are these outer attributes meant to be passed into the * block? */ break; default: add_error ( Error (t->get_locus (), "expected %<;%> or definiton at the end of trait %s " "definition - found %qs instead", is_method ? "method" : "function", t->get_token_description ())); // skip? return nullptr; } // do actual if instead of ternary for return value optimisation if (is_method) { AST::TraitMethodDecl method_decl (std::move (ident), std::move (qualifiers), std::move (generic_params), std::move (self_param), std::move (function_params), std::move (return_type), std::move (where_clause)); // TODO: does this (method_decl) need move? return std::unique_ptr ( new AST::TraitItemMethod (std::move (method_decl), std::move (definition), std::move (outer_attrs), tok->get_locus ())); } else { AST::TraitFunctionDecl function_decl (std::move (ident), std::move (qualifiers), std::move (generic_params), std::move (function_params), std::move (return_type), std::move (where_clause)); return std::unique_ptr (new AST::TraitItemFunc ( std::move (function_decl), std::move (definition), std::move (outer_attrs), tok->get_locus ())); } } default: { // TODO: try and parse macro invocation semi - if fails, maybe error. std::unique_ptr macro_invoc = parse_macro_invocation_semi (outer_attrs); if (macro_invoc == nullptr) { // TODO: error? return nullptr; } else { return macro_invoc; } /* FIXME: macro invocations can only start with certain tokens. be * more picky with these? */ } } } // Parse a typedef trait item. template std::unique_ptr Parser::parse_trait_type (AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); skip_token (TYPE); const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) return nullptr; Identifier ident = ident_tok->get_str (); std::vector> bounds; // parse optional colon if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); // parse optional type param bounds bounds = parse_type_param_bounds ([] (TokenId id) { return id == SEMICOLON; }); // bounds = parse_type_param_bounds (); } if (!skip_token (SEMICOLON)) { // skip? return nullptr; } return std::unique_ptr ( new AST::TraitItemType (std::move (ident), std::move (bounds), std::move (outer_attrs), locus)); } // Parses a constant trait item. template std::unique_ptr Parser::parse_trait_const (AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); skip_token (CONST); // parse constant item name const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) return nullptr; Identifier ident = ident_tok->get_str (); if (!skip_token (COLON)) { skip_after_semicolon (); return nullptr; } // parse constant trait item type std::unique_ptr type = parse_type (); // parse constant trait body expression, if it exists std::unique_ptr const_body = nullptr; if (lexer.peek_token ()->get_id () == EQUAL) { lexer.skip_token (); // expression must exist, so parse it const_body = parse_expr (); } if (!skip_token (SEMICOLON)) { // skip after something? return nullptr; } return std::unique_ptr ( new AST::TraitItemConst (std::move (ident), std::move (type), std::move (const_body), std::move (outer_attrs), locus)); } /* Parses a struct "impl" item (both inherent impl and trait impl can be * parsed here), */ template std::unique_ptr Parser::parse_impl (AST::Visibility vis, AST::AttrVec outer_attrs) { /* Note that only trait impls are allowed to be unsafe. So if unsafe, it * must be a trait impl. However, this isn't enough for full disambiguation, * so don't branch here. */ Location locus = lexer.peek_token ()->get_locus (); bool is_unsafe = false; if (lexer.peek_token ()->get_id () == UNSAFE) { lexer.skip_token (); is_unsafe = true; } if (!skip_token (IMPL)) { skip_after_next_block (); return nullptr; } // parse generic params (shared by trait and inherent impls) std::vector> generic_params = parse_generic_params_in_angles (); // Again, trait impl-only feature, but optional one, so can be used for // branching yet. bool has_exclam = false; if (lexer.peek_token ()->get_id () == EXCLAM) { lexer.skip_token (); has_exclam = true; } /* FIXME: code that doesn't look shit for TypePath. Also, make sure this * doesn't parse too much and not work. */ AST::TypePath type_path = parse_type_path (); if (type_path.is_error () || lexer.peek_token ()->get_id () != FOR) { /* cannot parse type path (or not for token next, at least), so must be * inherent impl */ // hacky conversion of TypePath stack object to Type pointer std::unique_ptr type = nullptr; if (!type_path.is_error ()) type = std::unique_ptr ( new AST::TypePath (std::move (type_path))); else type = parse_type (); // Type is required, so error if null if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse type in inherent impl"); add_error (std::move (error)); skip_after_next_block (); return nullptr; } // parse optional where clause AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (LEFT_CURLY)) { // TODO: does this still skip properly? skip_after_end_block (); return nullptr; } // parse inner attributes (optional) AST::AttrVec inner_attrs = parse_inner_attributes (); // parse inherent impl items std::vector> impl_items; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_CURLY) { std::unique_ptr impl_item = parse_inherent_impl_item (); if (impl_item == nullptr) { Error error ( lexer.peek_token ()->get_locus (), "failed to parse inherent impl item in inherent impl"); add_error (std::move (error)); return nullptr; } impl_items.push_back (std::move (impl_item)); t = lexer.peek_token (); } if (!skip_token (RIGHT_CURLY)) { // skip somewhere return nullptr; } // DEBUG rust_debug ("successfully parsed inherent impl"); impl_items.shrink_to_fit (); return std::unique_ptr (new AST::InherentImpl ( std::move (impl_items), std::move (generic_params), std::move (type), std::move (where_clause), std::move (vis), std::move (inner_attrs), std::move (outer_attrs), locus)); } else { // type path must both be valid and next token is for, so trait impl if (!skip_token (FOR)) { skip_after_next_block (); return nullptr; } // parse type std::unique_ptr type = parse_type (); // ensure type is included as it is required if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse type in trait impl"); add_error (std::move (error)); skip_after_next_block (); return nullptr; } // parse optional where clause AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (LEFT_CURLY)) { // TODO: does this still skip properly? skip_after_end_block (); return nullptr; } // parse inner attributes (optional) AST::AttrVec inner_attrs = parse_inner_attributes (); // parse trait impl items std::vector> impl_items; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_CURLY) { std::unique_ptr impl_item = parse_trait_impl_item (); if (impl_item == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse trait impl item in trait impl"); add_error (std::move (error)); return nullptr; } impl_items.push_back (std::move (impl_item)); t = lexer.peek_token (); // DEBUG rust_debug ("successfully parsed a trait impl item"); } // DEBUG rust_debug ("successfully finished trait impl items"); if (!skip_token (RIGHT_CURLY)) { // skip somewhere return nullptr; } // DEBUG rust_debug ("successfully parsed trait impl"); impl_items.shrink_to_fit (); return std::unique_ptr ( new AST::TraitImpl (std::move (type_path), is_unsafe, has_exclam, std::move (impl_items), std::move (generic_params), std::move (type), std::move (where_clause), std::move (vis), std::move (inner_attrs), std::move (outer_attrs), locus)); } } // Parses a single inherent impl item (item inside an inherent impl block). template std::unique_ptr Parser::parse_inherent_impl_item () { // parse outer attributes (if they exist) AST::AttrVec outer_attrs = parse_outer_attributes (); // TODO: cleanup - currently an unreadable mess // branch on next token: const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case IDENTIFIER: // FIXME: Arthur: Do we need to some lookahead here? return parse_macro_invocation_semi (outer_attrs); case SUPER: case SELF: case CRATE: case PUB: { // visibility, so not a macro invocation semi - must be constant, // function, or method AST::Visibility vis = parse_visibility (); // TODO: is a recursive call to parse_inherent_impl_item better? switch (lexer.peek_token ()->get_id ()) { case EXTERN_TOK: case UNSAFE: case FN_TOK: // function or method return parse_inherent_impl_function_or_method (std::move (vis), std::move ( outer_attrs)); case CONST: // lookahead to resolve production - could be function/method or // const item t = lexer.peek_token (1); switch (t->get_id ()) { case IDENTIFIER: case UNDERSCORE: return parse_const_item (std::move (vis), std::move (outer_attrs)); case UNSAFE: case EXTERN_TOK: case FN_TOK: return parse_inherent_impl_function_or_method (std::move (vis), std::move ( outer_attrs)); default: add_error (Error (t->get_locus (), "unexpected token %qs in some sort of const " "item in inherent impl", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } default: add_error ( Error (t->get_locus (), "unrecognised token %qs for item in inherent impl", t->get_token_description ())); // skip? return nullptr; } } case EXTERN_TOK: case UNSAFE: case FN_TOK: // function or method return parse_inherent_impl_function_or_method ( AST::Visibility::create_private (), std::move (outer_attrs)); case CONST: /* lookahead to resolve production - could be function/method or const * item */ t = lexer.peek_token (1); switch (t->get_id ()) { case IDENTIFIER: case UNDERSCORE: return parse_const_item (AST::Visibility::create_private (), std::move (outer_attrs)); case UNSAFE: case EXTERN_TOK: case FN_TOK: return parse_inherent_impl_function_or_method ( AST::Visibility::create_private (), std::move (outer_attrs)); default: add_error (Error (t->get_locus (), "unexpected token %qs in some sort of const item " "in inherent impl", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } gcc_unreachable (); default: add_error (Error (t->get_locus (), "unrecognised token %qs for item in inherent impl", t->get_token_description ())); // skip? return nullptr; } } /* For internal use only by parse_inherent_impl_item() - splits giant method * into smaller ones and prevents duplication of logic. Strictly, this parses * a function or method item inside an inherent impl item block. */ // TODO: make this a templated function with "return type" as type param - // InherentImplItem is this specialisation of the template while TraitImplItem // will be the other. template std::unique_ptr Parser::parse_inherent_impl_function_or_method ( AST::Visibility vis, AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); // parse function or method qualifiers AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); skip_token (FN_TOK); // parse function or method name const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) return nullptr; Identifier ident = ident_tok->get_str (); // parse generic params std::vector> generic_params = parse_generic_params_in_angles (); if (!skip_token (LEFT_PAREN)) { // skip after somewhere? return nullptr; } // now for function vs method disambiguation - method has opening "self" // param AST::SelfParam self_param = parse_self_param (); /* FIXME: ensure that self param doesn't accidently consume tokens for a * function one idea is to lookahead up to 4 tokens to see whether self is * one of them */ bool is_method = false; if (!self_param.is_error ()) { is_method = true; /* skip comma so function and method regular params can be parsed in * same way */ if (lexer.peek_token ()->get_id () == COMMA) lexer.skip_token (); } // parse trait function params std::vector function_params = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); if (!skip_token (RIGHT_PAREN)) { skip_after_end_block (); return nullptr; } // parse return type (optional) std::unique_ptr return_type = parse_function_return_type (); // parse where clause (optional) AST::WhereClause where_clause = parse_where_clause (); // parse function definition (in block) - semicolon not allowed if (lexer.peek_token ()->get_id () == SEMICOLON) { Error error (lexer.peek_token ()->get_locus (), "%s declaration in inherent impl not allowed - must have " "a definition", is_method ? "method" : "function"); add_error (std::move (error)); lexer.skip_token (); return nullptr; } std::unique_ptr body = parse_block_expr (); if (body == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse definition in inherent impl %s definition", is_method ? "method" : "function"); add_error (std::move (error)); skip_after_end_block (); return nullptr; } // do actual if instead of ternary for return value optimisation if (is_method) { return std::unique_ptr ( new AST::Method (std::move (ident), std::move (qualifiers), std::move (generic_params), std::move (self_param), std::move (function_params), std::move (return_type), std::move (where_clause), std::move (body), std::move (vis), std::move (outer_attrs), locus)); } else { return std::unique_ptr ( new AST::Function (std::move (ident), std::move (qualifiers), std::move (generic_params), std::move (function_params), std::move (return_type), std::move (where_clause), std::move (body), std::move (vis), std::move (outer_attrs), locus)); } } // Parses a single trait impl item (item inside a trait impl block). template std::unique_ptr Parser::parse_trait_impl_item () { // parse outer attributes (if they exist) AST::AttrVec outer_attrs = parse_outer_attributes (); // TODO: clean this function up, it is basically unreadable hacks // branch on next token: const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case IDENTIFIER: case SUPER: case SELF: case CRATE: case DOLLAR_SIGN: // these seem to be SimplePath tokens, so this is a macro invocation // semi return parse_macro_invocation_semi (std::move (outer_attrs)); case TYPE: return parse_type_alias (AST::Visibility::create_private (), std::move (outer_attrs)); case PUB: { // visibility, so not a macro invocation semi - must be constant, // function, or method AST::Visibility vis = parse_visibility (); // TODO: is a recursive call to parse_trait_impl_item better? switch (lexer.peek_token ()->get_id ()) { case TYPE: return parse_type_alias (std::move (vis), std::move (outer_attrs)); case EXTERN_TOK: case UNSAFE: case FN_TOK: // function or method return parse_trait_impl_function_or_method (std::move (vis), std::move ( outer_attrs)); case CONST: // lookahead to resolve production - could be function/method or // const item t = lexer.peek_token (1); switch (t->get_id ()) { case IDENTIFIER: case UNDERSCORE: return parse_const_item (std::move (vis), std::move (outer_attrs)); case UNSAFE: case EXTERN_TOK: case FN_TOK: return parse_trait_impl_function_or_method (std::move (vis), std::move ( outer_attrs)); default: add_error (Error (t->get_locus (), "unexpected token %qs in some sort of const " "item in trait impl", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } default: add_error (Error (t->get_locus (), "unrecognised token %qs for item in trait impl", t->get_token_description ())); // skip? return nullptr; } } case EXTERN_TOK: case UNSAFE: case FN_TOK: // function or method return parse_trait_impl_function_or_method ( AST::Visibility::create_private (), std::move (outer_attrs)); case CONST: // lookahead to resolve production - could be function/method or const // item t = lexer.peek_token (1); switch (t->get_id ()) { case IDENTIFIER: case UNDERSCORE: return parse_const_item (AST::Visibility::create_private (), std::move (outer_attrs)); case UNSAFE: case EXTERN_TOK: case FN_TOK: return parse_trait_impl_function_or_method ( AST::Visibility::create_private (), std::move (outer_attrs)); default: add_error (Error ( t->get_locus (), "unexpected token %qs in some sort of const item in trait impl", t->get_token_description ())); lexer.skip_token (1); // TODO: is this right thing to do? return nullptr; } gcc_unreachable (); default: add_error (Error (t->get_locus (), "unrecognised token %qs for item in trait impl", t->get_token_description ())); // skip? return nullptr; } } /* For internal use only by parse_trait_impl_item() - splits giant method into * smaller ones and prevents duplication of logic. Strictly, this parses a * function or method item inside a trait impl item block. */ template std::unique_ptr Parser::parse_trait_impl_function_or_method ( AST::Visibility vis, AST::AttrVec outer_attrs) { // this shares virtually all logic with // parse_inherent_impl_function_or_method // - template? Location locus = lexer.peek_token ()->get_locus (); // parse function or method qualifiers AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); skip_token (FN_TOK); // parse function or method name const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) { return nullptr; } Identifier ident = ident_tok->get_str (); // DEBUG: rust_debug ( "about to start parsing generic params in trait impl function or method"); // parse generic params std::vector> generic_params = parse_generic_params_in_angles (); // DEBUG: rust_debug ( "finished parsing generic params in trait impl function or method"); if (!skip_token (LEFT_PAREN)) { // skip after somewhere? return nullptr; } // now for function vs method disambiguation - method has opening "self" // param AST::SelfParam self_param = parse_self_param (); // FIXME: ensure that self param doesn't accidently consume tokens for a // function bool is_method = false; if (!self_param.is_error ()) { is_method = true; // skip comma so function and method regular params can be parsed in // same way if (lexer.peek_token ()->get_id () == COMMA) { lexer.skip_token (); } // DEBUG rust_debug ("successfully parsed self param in method trait impl item"); } // DEBUG rust_debug ( "started to parse function params in function or method trait impl item"); // parse trait function params (only if next token isn't right paren) std::vector function_params; if (lexer.peek_token ()->get_id () != RIGHT_PAREN) { function_params = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); if (function_params.empty ()) { Error error ( lexer.peek_token ()->get_locus (), "failed to parse function params in trait impl %s definition", is_method ? "method" : "function"); add_error (std::move (error)); skip_after_next_block (); return nullptr; } } // DEBUG rust_debug ("successfully parsed function params in function or method " "trait impl item"); if (!skip_token (RIGHT_PAREN)) { skip_after_next_block (); return nullptr; } // parse return type (optional) std::unique_ptr return_type = parse_function_return_type (); // DEBUG rust_debug ( "successfully parsed return type in function or method trait impl item"); // parse where clause (optional) AST::WhereClause where_clause = parse_where_clause (); // DEBUG rust_debug ( "successfully parsed where clause in function or method trait impl item"); // parse function definition (in block) - semicolon not allowed if (lexer.peek_token ()->get_id () == SEMICOLON) { Error error ( lexer.peek_token ()->get_locus (), "%s declaration in trait impl not allowed - must have a definition", is_method ? "method" : "function"); add_error (std::move (error)); lexer.skip_token (); return nullptr; } std::unique_ptr body = parse_block_expr (); if (body == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse definition in trait impl %s definition", is_method ? "method" : "function"); add_error (std::move (error)); skip_after_end_block (); return nullptr; } // do actual if instead of ternary for return value optimisation if (is_method) { return std::unique_ptr ( new AST::Method (std::move (ident), std::move (qualifiers), std::move (generic_params), std::move (self_param), std::move (function_params), std::move (return_type), std::move (where_clause), std::move (body), std::move (vis), std::move (outer_attrs), locus)); } else { return std::unique_ptr ( new AST::Function (std::move (ident), std::move (qualifiers), std::move (generic_params), std::move (function_params), std::move (return_type), std::move (where_clause), std::move (body), std::move (vis), std::move (outer_attrs), locus)); } } // Parses an extern block of declarations. template std::unique_ptr Parser::parse_extern_block (AST::Visibility vis, AST::AttrVec outer_attrs) { Location locus = lexer.peek_token ()->get_locus (); skip_token (EXTERN_TOK); // detect optional abi name std::string abi; const_TokenPtr next_tok = lexer.peek_token (); if (next_tok->get_id () == STRING_LITERAL) { lexer.skip_token (); abi = next_tok->get_str (); } if (!skip_token (LEFT_CURLY)) { skip_after_end_block (); return nullptr; } AST::AttrVec inner_attrs = parse_inner_attributes (); // parse declarations inside extern block std::vector> extern_items; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_CURLY) { std::unique_ptr extern_item = parse_external_item (); if (extern_item == nullptr) { Error error (t->get_locus (), "failed to parse external item despite not reaching " "end of extern block"); add_error (std::move (error)); return nullptr; } extern_items.push_back (std::move (extern_item)); t = lexer.peek_token (); } if (!skip_token (RIGHT_CURLY)) { // skip somewhere return nullptr; } extern_items.shrink_to_fit (); return std::unique_ptr ( new AST::ExternBlock (std::move (abi), std::move (extern_items), std::move (vis), std::move (inner_attrs), std::move (outer_attrs), locus)); } // Parses a single extern block item (static or function declaration). template std::unique_ptr Parser::parse_external_item () { // parse optional outer attributes AST::AttrVec outer_attrs = parse_outer_attributes (); Location locus = lexer.peek_token ()->get_locus (); // parse optional visibility AST::Visibility vis = parse_visibility (); const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case IDENTIFIER: return parse_macro_invocation_semi (outer_attrs); case STATIC_TOK: { // parse extern static item lexer.skip_token (); // parse mut (optional) bool has_mut = false; if (lexer.peek_token ()->get_id () == MUT) { lexer.skip_token (); has_mut = true; } // parse identifier const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) { skip_after_semicolon (); return nullptr; } Identifier ident = ident_tok->get_str (); if (!skip_token (COLON)) { skip_after_semicolon (); return nullptr; } // parse type (required) std::unique_ptr type = parse_type (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse type in external static item"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } if (!skip_token (SEMICOLON)) { // skip after somewhere? return nullptr; } return std::unique_ptr ( new AST::ExternalStaticItem (std::move (ident), std::move (type), has_mut, std::move (vis), std::move (outer_attrs), locus)); } case FN_TOK: { // parse extern function declaration item // skip function token lexer.skip_token (); // parse identifier const_TokenPtr ident_tok = expect_token (IDENTIFIER); if (ident_tok == nullptr) { skip_after_semicolon (); return nullptr; } Identifier ident = ident_tok->get_str (); // parse (optional) generic params std::vector> generic_params = parse_generic_params_in_angles (); if (!skip_token (LEFT_PAREN)) { skip_after_semicolon (); return nullptr; } // parse parameters std::vector function_params; bool is_variadic = false; AST::AttrVec variadic_attrs; const_TokenPtr t = lexer.peek_token (); while (t->get_id () != RIGHT_PAREN) { AST::AttrVec maybe_variadic_attrs = parse_outer_attributes (); if (lexer.peek_token ()->get_id () == ELLIPSIS) { // variadic - use attrs for this lexer.skip_token (); is_variadic = true; variadic_attrs = std::move (maybe_variadic_attrs); t = lexer.peek_token (); if (t->get_id () != RIGHT_PAREN) { Error error (t->get_locus (), "expected right parentheses after variadic in " "named function " "parameters, found %qs", t->get_token_description ()); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } break; } AST::NamedFunctionParam param = parse_named_function_param (std::move (maybe_variadic_attrs)); if (param.is_error ()) { Error error (t->get_locus (), "could not parse named function " "parameter in external function"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } function_params.push_back (std::move (param)); if (lexer.peek_token ()->get_id () != COMMA) break; // skip comma lexer.skip_token (); t = lexer.peek_token (); } if (!skip_token (RIGHT_PAREN)) { skip_after_semicolon (); return nullptr; } // parse (optional) return type std::unique_ptr return_type = parse_function_return_type (); // parse (optional) where clause AST::WhereClause where_clause = parse_where_clause (); if (!skip_token (SEMICOLON)) { // skip somewhere? return nullptr; } function_params.shrink_to_fit (); return std::unique_ptr ( new AST::ExternalFunctionItem ( std::move (ident), std::move (generic_params), std::move (return_type), std::move (where_clause), std::move (function_params), is_variadic, std::move (variadic_attrs), std::move (vis), std::move (outer_attrs), locus)); } default: // error add_error ( Error (t->get_locus (), "unrecognised token %qs in extern block item declaration", t->get_token_description ())); skip_after_semicolon (); return nullptr; } } /* Parses an extern block function param (with "pattern" being _ or an * identifier). */ template AST::NamedFunctionParam Parser::parse_named_function_param ( AST::AttrVec outer_attrs) { // parse identifier/_ std::string name; const_TokenPtr t = lexer.peek_token (); Location name_location = t->get_locus (); switch (t->get_id ()) { case IDENTIFIER: name = t->get_str (); lexer.skip_token (); break; case UNDERSCORE: name = "_"; lexer.skip_token (); break; default: // this is not a function param, but not necessarily an error return AST::NamedFunctionParam::create_error (); } if (!skip_token (COLON)) { // skip after somewhere? return AST::NamedFunctionParam::create_error (); } // parse (required) type std::unique_ptr param_type = parse_type (); if (param_type == nullptr) { Error error ( lexer.peek_token ()->get_locus (), "could not parse param type in extern block function declaration"); add_error (std::move (error)); skip_after_semicolon (); return AST::NamedFunctionParam::create_error (); } return AST::NamedFunctionParam (std::move (name), std::move (param_type), std::move (outer_attrs), name_location); } // Parses a statement (will further disambiguate any statement). template std::unique_ptr Parser::parse_stmt (ParseRestrictions restrictions) { // quick exit for empty statement // FIXME: Can we have empty statements without semicolons? Just nothing? const_TokenPtr t = lexer.peek_token (); if (t->get_id () == SEMICOLON) { lexer.skip_token (); return std::unique_ptr ( new AST::EmptyStmt (t->get_locus ())); } // parse outer attributes AST::AttrVec outer_attrs = parse_outer_attributes (); // parsing this will be annoying because of the many different possibilities /* best may be just to copy paste in parse_item switch, and failing that try * to parse outer attributes, and then pass them in to either a let * statement or (fallback) expression statement. */ // FIXME: think of a way to do this without such a large switch? t = lexer.peek_token (); switch (t->get_id ()) { case LET: // let statement return parse_let_stmt (std::move (outer_attrs), restrictions); case PUB: case MOD: case EXTERN_TOK: case USE: case FN_TOK: case TYPE: case STRUCT_TOK: case ENUM_TOK: case CONST: case STATIC_TOK: case TRAIT: case IMPL: /* TODO: implement union keyword but not really because of * context-dependence crappy hack way to parse a union written below to * separate it from the good code. */ // case UNION: case UNSAFE: // maybe - unsafe traits are a thing /* if any of these (should be all possible VisItem prefixes), parse a * VisItem can't parse item because would require reparsing outer * attributes */ return parse_vis_item (std::move (outer_attrs)); break; case SUPER: case SELF: case CRATE: case DOLLAR_SIGN: // almost certainly macro invocation semi return parse_macro_item (std::move (outer_attrs)); break; // crappy hack to do union "keyword" case IDENTIFIER: if (t->get_str () == "union" && lexer.peek_token (1)->get_id () == IDENTIFIER) { return parse_vis_item (std::move (outer_attrs)); // or should this go straight to parsing union? } else if (t->get_str () == "macro_rules") { // macro_rules! macro item return parse_macro_item (std::move (outer_attrs)); } else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION || lexer.peek_token (1)->get_id () == EXCLAM) { // FIXME: ensure doesn't take any expressions by mistake /* path (probably) or macro invocation, so probably a macro * invocation semi */ return parse_macro_item (std::move (outer_attrs)); } gcc_fallthrough (); // TODO: find out how to disable gcc "implicit fallthrough" warning default: // fallback: expression statement return parse_expr_stmt (std::move (outer_attrs), restrictions); break; } } // Parses a let statement. template std::unique_ptr Parser::parse_let_stmt (AST::AttrVec outer_attrs, ParseRestrictions restrictions) { Location locus = lexer.peek_token ()->get_locus (); skip_token (LET); // parse pattern (required) std::unique_ptr pattern = parse_pattern (); if (pattern == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse pattern in let statement"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } // parse type declaration (optional) std::unique_ptr type = nullptr; if (lexer.peek_token ()->get_id () == COLON) { // must have a type declaration lexer.skip_token (); type = parse_type (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse type in let statement"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } } // parse expression to set variable to (optional) std::unique_ptr expr = nullptr; if (lexer.peek_token ()->get_id () == EQUAL) { // must have an expression lexer.skip_token (); expr = parse_expr (); if (expr == nullptr) { Error error (lexer.peek_token ()->get_locus (), "failed to parse expression in let statement"); add_error (std::move (error)); skip_after_semicolon (); return nullptr; } } if (restrictions.consume_semi) if (!skip_token (SEMICOLON)) return nullptr; return std::unique_ptr ( new AST::LetStmt (std::move (pattern), std::move (expr), std::move (type), std::move (outer_attrs), locus)); } // Parses a type path. template AST::TypePath Parser::parse_type_path () { bool has_opening_scope_resolution = false; Location locus = lexer.peek_token ()->get_locus (); if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) { has_opening_scope_resolution = true; lexer.skip_token (); } // create segment vector std::vector> segments; // parse required initial segment std::unique_ptr initial_segment = parse_type_path_segment (); if (initial_segment == nullptr) { // skip after somewhere? // don't necessarily throw error but yeah return AST::TypePath::create_error (); } segments.push_back (std::move (initial_segment)); // parse optional segments (as long as scope resolution operator exists) const_TokenPtr t = lexer.peek_token (); while (t->get_id () == SCOPE_RESOLUTION) { // skip scope resolution operator lexer.skip_token (); // parse the actual segment - it is an error if it doesn't exist now std::unique_ptr segment = parse_type_path_segment (); if (segment == nullptr) { // skip after somewhere? Error error (t->get_locus (), "could not parse type path segment"); add_error (std::move (error)); return AST::TypePath::create_error (); } segments.push_back (std::move (segment)); t = lexer.peek_token (); } segments.shrink_to_fit (); return AST::TypePath (std::move (segments), locus, has_opening_scope_resolution); } template AST::GenericArg Parser::parse_generic_arg () { auto tok = lexer.peek_token (); std::unique_ptr expr = nullptr; switch (tok->get_id ()) { case IDENTIFIER: { // This is a bit of a weird situation: With an identifier token, we // could either have a valid type or a macro (FIXME: anything else?). So // we need one bit of lookahead to differentiate if this is really auto next_tok = lexer.peek_token (1); if (next_tok->get_id () == EXCLAM) { auto type = parse_type (); if (type) return AST::GenericArg::create_type (std::move (type)); else return AST::GenericArg::create_error (); } lexer.skip_token (); return AST::GenericArg::create_ambiguous (tok->get_str (), tok->get_locus ()); } case LEFT_CURLY: expr = parse_block_expr (); break; case MINUS: case STRING_LITERAL: case CHAR_LITERAL: case INT_LITERAL: case FLOAT_LITERAL: case TRUE_LITERAL: case FALSE_LITERAL: expr = parse_literal_expr (); break; // FIXME: Because of this, error reporting is garbage for const generic // parameter's default values default: { auto type = parse_type (); // FIXME: Find a better way to do this? if (type) return AST::GenericArg::create_type (std::move (type)); else return AST::GenericArg::create_error (); } } if (!expr) return AST::GenericArg::create_error (); return AST::GenericArg::create_const (std::move (expr)); } // Parses the generic arguments in each path segment. template AST::GenericArgs Parser::parse_path_generic_args () { if (!skip_token (LEFT_ANGLE)) { // skip after somewhere? return AST::GenericArgs::create_empty (); } // We need to parse all lifetimes, then parse types and const generics in // any order. // try to parse lifetimes first std::vector lifetime_args; const_TokenPtr t = lexer.peek_token (); Location locus = t->get_locus (); while (!is_right_angle_tok (t->get_id ())) { AST::Lifetime lifetime = parse_lifetime (); if (lifetime.is_error ()) { // not necessarily an error break; } lifetime_args.push_back (std::move (lifetime)); // if next token isn't comma, then it must be end of list if (lexer.peek_token ()->get_id () != COMMA) { break; } // skip comma lexer.skip_token (); t = lexer.peek_token (); } // try to parse types and const generics second std::vector generic_args; // TODO: think of better control structure t = lexer.peek_token (); while (!is_right_angle_tok (t->get_id ())) { // FIXME: Is it fine to break if there is one binding? Can't there be // bindings in between types? // ensure not binding being parsed as type accidently if (t->get_id () == IDENTIFIER && lexer.peek_token (1)->get_id () == EQUAL) break; auto arg = parse_generic_arg (); if (!arg.is_error ()) { generic_args.emplace_back (std::move (arg)); } // FIXME: Do we need to break if we encounter an error? // if next token isn't comma, then it must be end of list if (lexer.peek_token ()->get_id () != COMMA) break; // skip comma lexer.skip_token (); t = lexer.peek_token (); } // try to parse bindings third std::vector binding_args; // TODO: think of better control structure t = lexer.peek_token (); while (!is_right_angle_tok (t->get_id ())) { AST::GenericArgsBinding binding = parse_generic_args_binding (); if (binding.is_error ()) { // not necessarily an error break; } binding_args.push_back (std::move (binding)); // if next token isn't comma, then it must be end of list if (lexer.peek_token ()->get_id () != COMMA) { break; } // skip comma lexer.skip_token (); t = lexer.peek_token (); } // skip any trailing commas if (lexer.peek_token ()->get_id () == COMMA) lexer.skip_token (); if (!skip_generics_right_angle ()) return AST::GenericArgs::create_empty (); lifetime_args.shrink_to_fit (); generic_args.shrink_to_fit (); binding_args.shrink_to_fit (); return AST::GenericArgs (std::move (lifetime_args), std::move (generic_args), std::move (binding_args), locus); } // Parses a binding in a generic args path segment. template AST::GenericArgsBinding Parser::parse_generic_args_binding () { const_TokenPtr ident_tok = lexer.peek_token (); if (ident_tok->get_id () != IDENTIFIER) { // allow non error-inducing use // skip somewhere? return AST::GenericArgsBinding::create_error (); } lexer.skip_token (); Identifier ident = ident_tok->get_str (); if (!skip_token (EQUAL)) { // skip after somewhere? return AST::GenericArgsBinding::create_error (); } // parse type (required) std::unique_ptr type = parse_type (); if (type == nullptr) { // skip somewhere? return AST::GenericArgsBinding::create_error (); } return AST::GenericArgsBinding (std::move (ident), std::move (type), ident_tok->get_locus ()); } /* Parses a single type path segment (not including opening scope resolution, * but includes any internal ones). Includes generic args or type path * functions too. */ template std::unique_ptr Parser::parse_type_path_segment () { Location locus = lexer.peek_token ()->get_locus (); // parse ident segment part AST::PathIdentSegment ident_segment = parse_path_ident_segment (); if (ident_segment.is_error ()) { // not necessarily an error return nullptr; } /* lookahead to determine if variants exist - only consume scope resolution * then */ bool has_separating_scope_resolution = false; const_TokenPtr next = lexer.peek_token (1); if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION && (next->get_id () == LEFT_ANGLE || next->get_id () == LEFT_PAREN)) { has_separating_scope_resolution = true; lexer.skip_token (); } // branch into variants on next token const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) { case LEFT_ANGLE: { // parse generic args AST::GenericArgs generic_args = parse_path_generic_args (); return std::unique_ptr ( new AST::TypePathSegmentGeneric (std::move (ident_segment), has_separating_scope_resolution, std::move (generic_args), locus)); } case LEFT_PAREN: { // parse type path function AST::TypePathFunction type_path_function = parse_type_path_function (locus); if (type_path_function.is_error ()) { // skip after somewhere? return nullptr; } return std::unique_ptr ( new AST::TypePathSegmentFunction (std::move (ident_segment), has_separating_scope_resolution, std::move (type_path_function), locus)); } default: // neither of them return std::unique_ptr ( new AST::TypePathSegment (std::move (ident_segment), has_separating_scope_resolution, locus)); } gcc_unreachable (); } // Parses a function call representation inside a type path. template AST::TypePathFunction Parser::parse_type_path_function (Location id_location) { if (!skip_token (LEFT_PAREN)) { // skip somewhere? return AST::TypePathFunction::create_error (); } // parse function inputs std::vector> inputs; while (lexer.peek_token ()->get_id () != RIGHT_PAREN) { std::unique_ptr type = parse_type (); if (type == nullptr) { /* this is an error as there should've been a ')' there if there * wasn't a type */ Error error ( lexer.peek_token ()->get_locus (), "failed to parse type in parameters of type path function"); add_error (std::move (error)); // skip somewhere? return AST::TypePathFunction::create_error (); } inputs.push_back (std::move (type)); // skip commas, including trailing commas if (lexer.peek_token ()->get_id () != COMMA) break; lexer.skip_token (); } if (!skip_token (RIGHT_PAREN)) { // skip somewhere? return AST::TypePathFunction::create_error (); } // parse optional return type std::unique_ptr return_type = parse_function_return_type (); inputs.shrink_to_fit (); return AST::TypePathFunction (std::move (inputs), id_location, std::move (return_type)); } // Parses a path inside an expression that allows generic arguments. template AST::PathInExpression Parser::parse_path_in_expression () { Location locus = Linemap::unknown_location (); bool has_opening_scope_resolution = false; if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) { has_opening_scope_resolution = true; locus = lexer.peek_token ()->get_locus (); lexer.skip_token (); } // create segment vector std::vector segments; if (locus == Linemap::unknown_location ()) { locus = lexer.peek_token ()->get_locus (); } // parse required initial segment AST::PathExprSegment initial_segment = parse_path_expr_segment (); if (initial_segment.is_error ()) { // skip after somewhere? // don't necessarily throw error but yeah return AST::PathInExpression::create_error (); } segments.push_back (std::move (initial_segment)); // parse optional segments (as long as scope resolution operator exists) const_TokenPtr t = lexer.peek_token (); while (t->get_id () == SCOPE_RESOLUTION) { // skip scope resolution operator lexer.skip_token (); // parse the actual segment - it is an error if it doesn't exist now AST::PathExprSegment segment = parse_path_expr_segment (); if (segment.is_error ()) { // skip after somewhere? Error error (t->get_locus (), "could not parse path expression segment"); add_error (std::move (error)); return AST::PathInExpression::create_error (); } segments.push_back (std::move (segment)); t = lexer.peek_token (); } segments.shrink_to_fit (); return AST::PathInExpression (std::move (segments), {}, locus, has_opening_scope_resolution); } /* Parses a single path in expression path segment (including generic * arguments). */ template AST::PathExprSegment Parser::parse_path_expr_segment () { Location locus = lexer.peek_token ()->get_locus (); // parse ident segment AST::PathIdentSegment ident = parse_path_ident_segment (); if (ident.is_error ()) { // not necessarily an error? return AST::PathExprSegment::create_error (); } // parse generic args (and turbofish), if they exist /* use lookahead to determine if they actually exist (don't want to * accidently parse over next ident segment) */ if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION && lexer.peek_token (1)->get_id () == LEFT_ANGLE) { // skip scope resolution lexer.skip_token (); AST::GenericArgs generic_args = parse_path_generic_args (); return AST::PathExprSegment (std::move (ident), locus, std::move (generic_args)); } // return a generic parameter-less expr segment if not found return AST::PathExprSegment (std::move (ident), locus); } /* Parses a fully qualified path in expression (i.e. a pattern). FIXME does * not parse outer attrs. */ template AST::QualifiedPathInExpression Parser::parse_qualified_path_in_expression ( Location pratt_parsed_loc) { /* Note: the Rust grammar is defined in such a way that it is impossible to * determine whether a prospective qualified path is a * QualifiedPathInExpression or QualifiedPathInType in all cases by the * rules themselves (the only possible difference is a TypePathSegment with * function, and lookahead to find this is too difficult). However, as this * is a pattern and QualifiedPathInType is a type, I believe it that their * construction will not be confused (due to rules regarding patterns vs * types). * As such, this function will not attempt to minimise errors created by * their confusion. */ // parse the qualified path type (required) AST::QualifiedPathType qual_path_type = parse_qualified_path_type (pratt_parsed_loc); if (qual_path_type.is_error ()) { // TODO: should this create a parse error? return AST::QualifiedPathInExpression::create_error (); } Location locus = qual_path_type.get_locus (); // parse path segments std::vector segments; // parse initial required segment if (!expect_token (SCOPE_RESOLUTION)) { // skip after somewhere? return AST::QualifiedPathInExpression::create_error (); } AST::PathExprSegment initial_segment = parse_path_expr_segment (); if (initial_segment.is_error ()) { // skip after somewhere? Error error (lexer.peek_token ()->get_locus (), "required initial path expression segment in " "qualified path in expression could not be parsed"); add_error (std::move (error)); return AST::QualifiedPathInExpression::create_error (); } segments.push_back (std::move (initial_segment)); // parse optional segments (as long as scope resolution operator exists) const_TokenPtr t = lexer.peek_token (); while (t->get_id () == SCOPE_RESOLUTION) { // skip scope resolution operator lexer.skip_token (); // parse the actual segment - it is an error if it doesn't exist now AST::PathExprSegment segment = parse_path_expr_segment (); if (segment.is_error ()) { // skip after somewhere? Error error (t->get_locus (), "could not parse path expression segment in qualified " "path in expression"); add_error (std::move (error)); return AST::QualifiedPathInExpression::create_error (); } segments.push_back (std::move (segment)); t = lexer.peek_token (); } segments.shrink_to_fit (); // FIXME: outer attr parsing return AST::QualifiedPathInExpression (std::move (qual_path_type), std::move (segments), {}, locus); } // Parses the type syntactical construction at the start of a qualified path. template AST::QualifiedPathType Parser::parse_qualified_path_type ( Location pratt_parsed_loc) { Location locus = pratt_parsed_loc; /* TODO: should this actually be error? is there anywhere where this could * be valid? */ if (locus == Linemap::unknown_location ()) { locus = lexer.peek_token ()->get_locus (); if (!skip_token (LEFT_ANGLE)) { // skip after somewhere? return AST::QualifiedPathType::create_error (); } } // parse type (required) std::unique_ptr type = parse_type (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse type in qualified path type"); add_error (std::move (error)); // skip somewhere? return AST::QualifiedPathType::create_error (); } // parse optional as clause AST::TypePath as_type_path = AST::TypePath::create_error (); if (lexer.peek_token ()->get_id () == AS) { lexer.skip_token (); // parse type path, which is required now as_type_path = parse_type_path (); if (as_type_path.is_error ()) { Error error ( lexer.peek_token ()->get_locus (), "could not parse type path in as clause in qualified path type"); add_error (std::move (error)); // skip somewhere? return AST::QualifiedPathType::create_error (); } } /* NOTE: should actually be a right-angle token, so * skip_generics_right_angle shouldn't be required */ if (!skip_token (RIGHT_ANGLE)) { // skip after somewhere? return AST::QualifiedPathType::create_error (); } return AST::QualifiedPathType (std::move (type), locus, std::move (as_type_path)); } // Parses a fully qualified path in type (i.e. a type). template AST::QualifiedPathInType Parser::parse_qualified_path_in_type () { Location locus = lexer.peek_token ()->get_locus (); // parse the qualified path type (required) AST::QualifiedPathType qual_path_type = parse_qualified_path_type (); if (qual_path_type.is_error ()) { // TODO: should this create a parse error? return AST::QualifiedPathInType::create_error (); } // parse initial required segment if (!expect_token (SCOPE_RESOLUTION)) { // skip after somewhere? return AST::QualifiedPathInType::create_error (); } std::unique_ptr initial_segment = parse_type_path_segment (); if (initial_segment == nullptr) { // skip after somewhere? Error error (lexer.peek_token ()->get_locus (), "required initial type path segment in qualified path in " "type could not be parsed"); add_error (std::move (error)); return AST::QualifiedPathInType::create_error (); } // parse optional segments (as long as scope resolution operator exists) std::vector> segments; const_TokenPtr t = lexer.peek_token (); while (t->get_id () == SCOPE_RESOLUTION) { // skip scope resolution operator lexer.skip_token (); // parse the actual segment - it is an error if it doesn't exist now std::unique_ptr segment = parse_type_path_segment (); if (segment == nullptr) { // skip after somewhere? Error error ( t->get_locus (), "could not parse type path segment in qualified path in type"); add_error (std::move (error)); return AST::QualifiedPathInType::create_error (); } segments.push_back (std::move (segment)); t = lexer.peek_token (); } segments.shrink_to_fit (); return AST::QualifiedPathInType (std::move (qual_path_type), std::move (initial_segment), std::move (segments), locus); } // Parses a self param. Also handles self param not existing. template AST::SelfParam Parser::parse_self_param () { bool has_reference = false; AST::Lifetime lifetime = AST::Lifetime::error (); Location locus = lexer.peek_token ()->get_locus (); // test if self is a reference parameter if (lexer.peek_token ()->get_id () == AMP) { has_reference = true; lexer.skip_token (); // now test whether it has a lifetime if (lexer.peek_token ()->get_id () == LIFETIME) { lifetime = parse_lifetime (); // something went wrong somehow if (lifetime.is_error ()) { Error error (lexer.peek_token ()->get_locus (), "failed to parse lifetime in self param"); add_error (std::move (error)); // skip after somewhere? return AST::SelfParam::create_error (); } } } // test for mut bool has_mut = false; if (lexer.peek_token ()->get_id () == MUT) { has_mut = true; lexer.skip_token (); } // skip self token const_TokenPtr self_tok = lexer.peek_token (); if (self_tok->get_id () != SELF) { // skip after somewhere? return AST::SelfParam::create_error (); } lexer.skip_token (); // parse optional type std::unique_ptr type = nullptr; if (lexer.peek_token ()->get_id () == COLON) { lexer.skip_token (); // type is now required type = parse_type (); if (type == nullptr) { Error error (lexer.peek_token ()->get_locus (), "could not parse type in self param"); add_error (std::move (error)); // skip after somewhere? return AST::SelfParam::create_error (); } } // ensure that cannot have both type and reference if (type != nullptr && has_reference) { Error error ( lexer.peek_token ()->get_locus (), "cannot have both a reference and a type specified in a self param"); add_error (std::move (error)); // skip after somewhere? return AST::SelfParam::create_error (); } if (has_reference) { return AST::SelfParam (std::move (lifetime), has_mut, locus); } else { // note that type may be nullptr here and that's fine return AST::SelfParam (std::move (type), has_mut, locus); } }