// Copyright (C) 2020-2023 Free Software Foundation, Inc.
// This file is part of GCC.
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3. If not see
// .
/* Template implementation for Rust::Parser. Previously in rust-parse.cc (before
* Parser was template). Separated from rust-parse.h for readability. */
/* DO NOT INCLUDE ANYWHERE - this is automatically included with rust-parse.h
* This is also the reason why there are no include guards. */
#define INCLUDE_ALGORITHM
#include "rust-diagnostics.h"
#include "rust-make-unique.h"
namespace Rust {
// Left binding powers of operations.
enum binding_powers
{
// Highest priority
LBP_HIGHEST = 100,
LBP_PATH = 95,
LBP_METHOD_CALL = 90,
LBP_FIELD_EXPR = 85,
LBP_FUNCTION_CALL = 80,
LBP_ARRAY_REF = LBP_FUNCTION_CALL,
LBP_QUESTION_MARK = 75, // unary postfix - counts as left
LBP_UNARY_PLUS = 70, // Used only when the null denotation is +
LBP_UNARY_MINUS = LBP_UNARY_PLUS, // Used only when the null denotation is -
LBP_UNARY_ASTERISK = LBP_UNARY_PLUS, // deref operator - unary prefix
LBP_UNARY_EXCLAM = LBP_UNARY_PLUS,
LBP_UNARY_AMP = LBP_UNARY_PLUS,
LBP_UNARY_AMP_MUT = LBP_UNARY_PLUS,
LBP_AS = 65,
LBP_MUL = 60,
LBP_DIV = LBP_MUL,
LBP_MOD = LBP_MUL,
LBP_PLUS = 55,
LBP_MINUS = LBP_PLUS,
LBP_L_SHIFT = 50,
LBP_R_SHIFT = LBP_L_SHIFT,
LBP_AMP = 45,
LBP_CARET = 40,
LBP_PIPE = 35,
LBP_EQUAL = 30,
LBP_NOT_EQUAL = LBP_EQUAL,
LBP_SMALLER_THAN = LBP_EQUAL,
LBP_SMALLER_EQUAL = LBP_EQUAL,
LBP_GREATER_THAN = LBP_EQUAL,
LBP_GREATER_EQUAL = LBP_EQUAL,
LBP_LOGICAL_AND = 25,
LBP_LOGICAL_OR = 20,
LBP_DOT_DOT = 15,
LBP_DOT_DOT_EQ = LBP_DOT_DOT,
// TODO: note all these assig operators are RIGHT associative!
LBP_ASSIG = 10,
LBP_PLUS_ASSIG = LBP_ASSIG,
LBP_MINUS_ASSIG = LBP_ASSIG,
LBP_MULT_ASSIG = LBP_ASSIG,
LBP_DIV_ASSIG = LBP_ASSIG,
LBP_MOD_ASSIG = LBP_ASSIG,
LBP_AMP_ASSIG = LBP_ASSIG,
LBP_PIPE_ASSIG = LBP_ASSIG,
LBP_CARET_ASSIG = LBP_ASSIG,
LBP_L_SHIFT_ASSIG = LBP_ASSIG,
LBP_R_SHIFT_ASSIG = LBP_ASSIG,
// return, break, and closures as lowest priority?
LBP_RETURN = 5,
LBP_BREAK = LBP_RETURN,
LBP_CLOSURE = LBP_RETURN, // unary prefix operators
#if 0
// rust precedences
// used for closures
PREC_CLOSURE = -40,
// used for break, continue, return, and yield
PREC_JUMP = -30,
// used for range (although weird comment in rustc about this)
PREC_RANGE = -10,
// used for binary operators mentioned below - also cast, colon (type),
// assign, assign_op
PREC_BINOP = FROM_ASSOC_OP,
// used for box, address_of, let, unary (again, weird comment on let)
PREC_PREFIX = 50,
// used for await, call, method call, field, index, try,
// inline asm, macro invocation
PREC_POSTFIX = 60,
// used for array, repeat, tuple, literal, path, paren, if,
// while, for, 'loop', match, block, try block, async, struct
PREC_PAREN = 99,
PREC_FORCE_PAREN = 100,
#endif
// lowest priority
LBP_LOWEST = 0
};
/* Returns whether the token can start a type (i.e. there is a valid type
* beginning with the token). */
inline bool
can_tok_start_type (TokenId id)
{
switch (id)
{
case EXCLAM:
case LEFT_SQUARE:
case LEFT_ANGLE:
case UNDERSCORE:
case ASTERISK:
case AMP:
case LIFETIME:
case IDENTIFIER:
case SUPER:
case SELF:
case SELF_ALIAS:
case CRATE:
case DOLLAR_SIGN:
case SCOPE_RESOLUTION:
case LEFT_PAREN:
case FOR:
case ASYNC:
case CONST:
case UNSAFE:
case EXTERN_TOK:
case FN_TOK:
case IMPL:
case DYN:
case QUESTION_MARK:
return true;
default:
return false;
}
}
/* Returns whether the token id is (or is likely to be) a right angle bracket.
* i.e. '>', '>>', '>=' and '>>=' tokens. */
inline bool
is_right_angle_tok (TokenId id)
{
switch (id)
{
case RIGHT_ANGLE:
case RIGHT_SHIFT:
case GREATER_OR_EQUAL:
case RIGHT_SHIFT_EQ:
return true;
default:
return false;
}
}
/* HACK-y special handling for skipping a right angle token at the end of
* generic arguments.
* Currently, this replaces the "current token" with one that is identical
* except has the leading '>' removed (e.g. '>>' becomes '>'). This is bad
* for several reasons - it modifies the token stream to something that
* actually doesn't make syntactic sense, it may not worked if the token
* has already been skipped, etc. It was done because it would not
* actually require inserting new items into the token stream (which I
* thought would take more work to not mess up) and because I wasn't sure
* if the "already seen right angle" flag in the parser would work
* correctly.
* Those two other approaches listed are in my opinion actually better
* long-term - insertion is probably best as it reflects syntactically
* what occurs. On the other hand, I need to do a code audit to make sure
* that insertion doesn't mess anything up. So that's a FIXME. */
template
bool
Parser::skip_generics_right_angle ()
{
/* OK, new great idea. Have a lexer method called
* "split_current_token(TokenType newLeft, TokenType newRight)", which is
* called here with whatever arguments are appropriate. That lexer method
* handles "replacing" the current token with the "newLeft" and "inserting"
* the next token with the "newRight" (and creating a location, etc. for it)
*/
/* HACK: special handling for right shift '>>', greater or equal '>=', and
* right shift assig */
// '>>='
const_TokenPtr tok = lexer.peek_token ();
switch (tok->get_id ())
{
case RIGHT_ANGLE:
// this is good - skip token
lexer.skip_token ();
return true;
case RIGHT_SHIFT: {
// new implementation that should be better
lexer.split_current_token (RIGHT_ANGLE, RIGHT_ANGLE);
lexer.skip_token ();
return true;
}
case GREATER_OR_EQUAL: {
// new implementation that should be better
lexer.split_current_token (RIGHT_ANGLE, EQUAL);
lexer.skip_token ();
return true;
}
case RIGHT_SHIFT_EQ: {
// new implementation that should be better
lexer.split_current_token (RIGHT_ANGLE, GREATER_OR_EQUAL);
lexer.skip_token ();
return true;
}
default:
add_error (Error (tok->get_locus (),
"expected %<>%> at end of generic argument - found %qs",
tok->get_token_description ()));
return false;
}
}
/* Gets left binding power for specified token.
* Not suitable for use at the moment or possibly ever because binding power
* cannot be purely determined from operator token with Rust grammar - e.g.
* method call and field access have
* different left binding powers but the same operator token. */
template
int
Parser::left_binding_power (const_TokenPtr token)
{
// HACK: called with "peek_token()", so lookahead is "peek_token(1)"
switch (token->get_id ())
{
/* TODO: issue here - distinguish between method calls and field access
* somehow? Also would have to distinguish between paths and function
* calls (:: operator), maybe more stuff. */
/* Current plan for tackling LBP - don't do it based on token, use
* lookahead. Or alternatively, only use Pratt parsing for OperatorExpr
* and handle other expressions without it. rustc only considers
* arithmetic, logical/relational, 'as',
* '?=', ranges, colons, and assignment to have operator precedence and
* associativity rules applicable. It then has
* a separate "ExprPrecedence" that also includes binary operators. */
// TODO: handle operator overloading - have a function replace the
// operator?
/*case DOT:
return LBP_DOT;*/
case SCOPE_RESOLUTION:
rust_debug (
"possible error - looked up LBP of scope resolution operator. should "
"be handled elsewhere.");
return LBP_PATH;
/* Resolved by lookahead HACK that should work with current code. If next
* token is identifier and token after that isn't parenthesised expression
* list, it is a field reference. */
case DOT:
if (lexer.peek_token (1)->get_id () == IDENTIFIER
&& lexer.peek_token (2)->get_id () != LEFT_PAREN)
{
return LBP_FIELD_EXPR;
}
return LBP_METHOD_CALL;
case LEFT_PAREN:
return LBP_FUNCTION_CALL;
case LEFT_SQUARE:
return LBP_ARRAY_REF;
// postfix question mark (i.e. error propagation expression)
case QUESTION_MARK:
return LBP_QUESTION_MARK;
case AS:
return LBP_AS;
case ASTERISK:
return LBP_MUL;
case DIV:
return LBP_DIV;
case PERCENT:
return LBP_MOD;
case PLUS:
return LBP_PLUS;
case MINUS:
return LBP_MINUS;
case LEFT_SHIFT:
return LBP_L_SHIFT;
case RIGHT_SHIFT:
return LBP_R_SHIFT;
// binary & operator
case AMP:
return LBP_AMP;
// binary ^ operator
case CARET:
return LBP_CARET;
// binary | operator
case PIPE:
return LBP_PIPE;
case EQUAL_EQUAL:
return LBP_EQUAL;
case NOT_EQUAL:
return LBP_NOT_EQUAL;
case RIGHT_ANGLE:
return LBP_GREATER_THAN;
case GREATER_OR_EQUAL:
return LBP_GREATER_EQUAL;
case LEFT_ANGLE:
return LBP_SMALLER_THAN;
case LESS_OR_EQUAL:
return LBP_SMALLER_EQUAL;
case LOGICAL_AND:
return LBP_LOGICAL_AND;
case OR:
return LBP_LOGICAL_OR;
case DOT_DOT:
return LBP_DOT_DOT;
case DOT_DOT_EQ:
return LBP_DOT_DOT_EQ;
case EQUAL:
return LBP_ASSIG;
case PLUS_EQ:
return LBP_PLUS_ASSIG;
case MINUS_EQ:
return LBP_MINUS_ASSIG;
case ASTERISK_EQ:
return LBP_MULT_ASSIG;
case DIV_EQ:
return LBP_DIV_ASSIG;
case PERCENT_EQ:
return LBP_MOD_ASSIG;
case AMP_EQ:
return LBP_AMP_ASSIG;
case PIPE_EQ:
return LBP_PIPE_ASSIG;
case CARET_EQ:
return LBP_CARET_ASSIG;
case LEFT_SHIFT_EQ:
return LBP_L_SHIFT_ASSIG;
case RIGHT_SHIFT_EQ:
return LBP_R_SHIFT_ASSIG;
/* HACK: float literal due to lexer misidentifying a dot then an integer as
* a float */
case FLOAT_LITERAL:
return LBP_FIELD_EXPR;
// field expr is same as tuple expr in precedence, i imagine
// TODO: is this needed anymore? lexer shouldn't do that anymore
// anything that can't appear in an infix position is given lowest priority
default:
return LBP_LOWEST;
}
}
// Returns true when current token is EOF.
template
bool
Parser::done_end_of_file ()
{
return lexer.peek_token ()->get_id () == END_OF_FILE;
}
// Parses a sequence of items within a module or the implicit top-level module
// in a crate
template
std::vector>
Parser::parse_items ()
{
std::vector> items;
const_TokenPtr t = lexer.peek_token ();
while (t->get_id () != END_OF_FILE)
{
std::unique_ptr item = parse_item (false);
if (item == nullptr)
{
Error error (lexer.peek_token ()->get_locus (),
"failed to parse item in crate");
add_error (std::move (error));
// TODO: should all items be cleared?
items = std::vector> ();
break;
}
items.push_back (std::move (item));
t = lexer.peek_token ();
}
return items;
}
// Parses a crate (compilation unit) - entry point
template
std::unique_ptr
Parser::parse_crate ()
{
// parse inner attributes
AST::AttrVec inner_attrs = parse_inner_attributes ();
// parse items
std::vector> items = parse_items ();
// emit all errors
for (const auto &error : error_table)
error.emit ();
return std::unique_ptr (
new AST::Crate (std::move (items), std::move (inner_attrs)));
}
// Parse a contiguous block of inner attributes.
template
AST::AttrVec
Parser::parse_inner_attributes ()
{
AST::AttrVec inner_attributes;
// only try to parse it if it starts with "#!" not only "#"
while ((lexer.peek_token ()->get_id () == HASH
&& lexer.peek_token (1)->get_id () == EXCLAM)
|| lexer.peek_token ()->get_id () == INNER_DOC_COMMENT)
{
AST::Attribute inner_attr = parse_inner_attribute ();
/* Ensure only valid inner attributes are added to the inner_attributes
* list */
if (!inner_attr.is_empty ())
{
inner_attributes.push_back (std::move (inner_attr));
}
else
{
/* If no more valid inner attributes, break out of loop (only
* contiguous inner attributes parsed). */
break;
}
}
inner_attributes.shrink_to_fit ();
return inner_attributes;
}
// Parse a inner or outer doc comment into an doc attribute
template
AST::Attribute
Parser::parse_doc_comment ()
{
const_TokenPtr token = lexer.peek_token ();
Location locus = token->get_locus ();
AST::SimplePathSegment segment ("doc", locus);
std::vector segments;
segments.push_back (std::move (segment));
AST::SimplePath attr_path (std::move (segments), false, locus);
AST::LiteralExpr lit_expr (token->get_str (), AST::Literal::STRING,
PrimitiveCoreType::CORETYPE_STR, {}, locus);
std::unique_ptr attr_input (
new AST::AttrInputLiteral (std::move (lit_expr)));
lexer.skip_token ();
return AST::Attribute (std::move (attr_path), std::move (attr_input), locus);
}
// Parse a single inner attribute.
template
AST::Attribute
Parser::parse_inner_attribute ()
{
if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT)
return parse_doc_comment ();
if (lexer.peek_token ()->get_id () != HASH)
{
Error error (lexer.peek_token ()->get_locus (),
"BUG: token %<#%> is missing, but % "
"was invoked");
add_error (std::move (error));
return AST::Attribute::create_empty ();
}
lexer.skip_token ();
if (lexer.peek_token ()->get_id () != EXCLAM)
{
Error error (lexer.peek_token ()->get_locus (),
"expected % or %<[%> for inner attribute");
add_error (std::move (error));
return AST::Attribute::create_empty ();
}
lexer.skip_token ();
if (!skip_token (LEFT_SQUARE))
return AST::Attribute::create_empty ();
AST::Attribute actual_attribute = parse_attribute_body ();
if (!skip_token (RIGHT_SQUARE))
return AST::Attribute::create_empty ();
return actual_attribute;
}
// Parses the body of an attribute (inner or outer).
template
AST::Attribute
Parser::parse_attribute_body ()
{
Location locus = lexer.peek_token ()->get_locus ();
AST::SimplePath attr_path = parse_simple_path ();
// ensure path is valid to parse attribute input
if (attr_path.is_empty ())
{
Error error (lexer.peek_token ()->get_locus (),
"empty simple path in attribute");
add_error (std::move (error));
// Skip past potential further info in attribute (i.e. attr_input)
skip_after_end_attribute ();
return AST::Attribute::create_empty ();
}
std::unique_ptr attr_input = parse_attr_input ();
// AttrInput is allowed to be null, so no checks here
return AST::Attribute (std::move (attr_path), std::move (attr_input), locus);
}
/* Determines whether token is a valid simple path segment. This does not
* include scope resolution operators. */
inline bool
is_simple_path_segment (TokenId id)
{
switch (id)
{
case IDENTIFIER:
case SUPER:
case SELF:
case CRATE:
return true;
case DOLLAR_SIGN:
// assume that dollar sign leads to $crate
return true;
default:
return false;
}
}
// Parses a SimplePath AST node, if it exists. Does nothing otherwise.
template
AST::SimplePath
Parser::parse_simple_path ()
{
bool has_opening_scope_resolution = false;
Location locus = Linemap::unknown_location ();
// don't parse anything if not a path upfront
if (!is_simple_path_segment (lexer.peek_token ()->get_id ())
&& !is_simple_path_segment (lexer.peek_token (1)->get_id ()))
return AST::SimplePath::create_empty ();
/* Checks for opening scope resolution (i.e. global scope fully-qualified
* path) */
if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION)
{
has_opening_scope_resolution = true;
locus = lexer.peek_token ()->get_locus ();
lexer.skip_token ();
}
// Parse single required simple path segment
AST::SimplePathSegment segment = parse_simple_path_segment ();
// get location if not gotten already
if (locus == Linemap::unknown_location ())
locus = segment.get_locus ();
std::vector segments;
// Return empty vector if first, actually required segment is an error
if (segment.is_error ())
return AST::SimplePath::create_empty ();
segments.push_back (std::move (segment));
// Parse all other simple path segments
while (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION)
{
// Skip scope resolution operator
lexer.skip_token ();
AST::SimplePathSegment new_segment = parse_simple_path_segment ();
// Return path as currently constructed if segment in error state.
if (new_segment.is_error ())
break;
segments.push_back (std::move (new_segment));
}
// DEBUG: check for any empty segments
for (const auto &seg : segments)
{
if (seg.is_error ())
{
rust_debug (
"when parsing simple path, somehow empty path segment was "
"not filtered out. Path begins with '%s'",
segments.at (0).as_string ().c_str ());
}
}
return AST::SimplePath (std::move (segments), has_opening_scope_resolution,
locus);
/* TODO: now that is_simple_path_segment exists, could probably start
* actually making errors upon parse failure of segments and whatever */
}
/* Parses a single SimplePathSegment (does not handle the scope resolution
* operators) */
template
AST::SimplePathSegment
Parser::parse_simple_path_segment ()
{
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case IDENTIFIER:
lexer.skip_token ();
return AST::SimplePathSegment (t->get_str (), t->get_locus ());
case SUPER:
lexer.skip_token ();
return AST::SimplePathSegment ("super", t->get_locus ());
case SELF:
lexer.skip_token ();
return AST::SimplePathSegment ("self", t->get_locus ());
case CRATE:
lexer.skip_token ();
return AST::SimplePathSegment ("crate", t->get_locus ());
case DOLLAR_SIGN:
if (lexer.peek_token (1)->get_id () == CRATE)
{
lexer.skip_token (1);
return AST::SimplePathSegment ("$crate", t->get_locus ());
}
gcc_fallthrough ();
default:
// do nothing but inactivates warning from gcc when compiling
/* could put the rust_error_at thing here but fallthrough (from failing
* $crate condition) isn't completely obvious if it is. */
// test prevent error
return AST::SimplePathSegment::create_error ();
}
gcc_unreachable ();
/*rust_error_at(
t->get_locus(), "invalid token '%s' in simple path segment",
t->get_token_description());*/
// this is not necessarily an error, e.g. end of path
// return AST::SimplePathSegment::create_error();
}
// Parses a PathIdentSegment - an identifier segment of a non-SimplePath path.
template
AST::PathIdentSegment
Parser::parse_path_ident_segment ()
{
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case IDENTIFIER:
lexer.skip_token ();
return AST::PathIdentSegment (t->get_str (), t->get_locus ());
case SUPER:
lexer.skip_token ();
return AST::PathIdentSegment ("super", t->get_locus ());
case SELF:
lexer.skip_token ();
return AST::PathIdentSegment ("self", t->get_locus ());
case SELF_ALIAS:
lexer.skip_token ();
return AST::PathIdentSegment ("Self", t->get_locus ());
case CRATE:
lexer.skip_token ();
return AST::PathIdentSegment ("crate", t->get_locus ());
case DOLLAR_SIGN:
if (lexer.peek_token (1)->get_id () == CRATE)
{
lexer.skip_token (1);
return AST::PathIdentSegment ("$crate", t->get_locus ());
}
gcc_fallthrough ();
default:
/* do nothing but inactivates warning from gcc when compiling
* could put the error_at thing here but fallthrough (from failing $crate
* condition) isn't completely obvious if it is. */
// test prevent error
return AST::PathIdentSegment::create_error ();
}
gcc_unreachable ();
// not necessarily an error
}
// Parses an AttrInput AST node (polymorphic, as AttrInput is abstract)
template
std::unique_ptr
Parser::parse_attr_input ()
{
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case LEFT_PAREN:
case LEFT_SQUARE:
case LEFT_CURLY: {
// must be a delimited token tree, so parse that
std::unique_ptr input_tree (
new AST::DelimTokenTree (parse_delim_token_tree ()));
// TODO: potential checks on DelimTokenTree before returning
return input_tree;
}
case EQUAL: {
// = LiteralExpr
lexer.skip_token ();
t = lexer.peek_token ();
/* Ensure token is a "literal expression" (literally only a literal
* token of any type) */
if (!t->is_literal ())
{
Error error (
t->get_locus (),
"unknown token %qs in attribute body - literal expected",
t->get_token_description ());
add_error (std::move (error));
skip_after_end_attribute ();
return nullptr;
}
AST::Literal::LitType lit_type = AST::Literal::STRING;
// Crappy mapping of token type to literal type
switch (t->get_id ())
{
case INT_LITERAL:
lit_type = AST::Literal::INT;
break;
case FLOAT_LITERAL:
lit_type = AST::Literal::FLOAT;
break;
case CHAR_LITERAL:
lit_type = AST::Literal::CHAR;
break;
case BYTE_CHAR_LITERAL:
lit_type = AST::Literal::BYTE;
break;
case BYTE_STRING_LITERAL:
lit_type = AST::Literal::BYTE_STRING;
break;
case STRING_LITERAL:
default:
lit_type = AST::Literal::STRING;
break; // TODO: raw string? don't eliminate it from lexer?
}
// create actual LiteralExpr
AST::LiteralExpr lit_expr (t->get_str (), lit_type, t->get_type_hint (),
{}, t->get_locus ());
lexer.skip_token ();
std::unique_ptr attr_input_lit (
new AST::AttrInputLiteral (std::move (lit_expr)));
// do checks or whatever? none required, really
// FIXME: shouldn't a skip token be required here?
return attr_input_lit;
}
break;
case RIGHT_SQUARE:
// means AttrInput is missing, which is allowed
return nullptr;
default:
add_error (
Error (t->get_locus (),
"unknown token %qs in attribute body - attribute input or "
"none expected",
t->get_token_description ()));
skip_after_end_attribute ();
return nullptr;
}
gcc_unreachable ();
// TODO: find out how to stop gcc error on "no return value"
}
/* Returns true if the token id matches the delimiter type. Note that this only
* operates for END delimiter tokens. */
inline bool
token_id_matches_delims (TokenId token_id, AST::DelimType delim_type)
{
return ((token_id == RIGHT_PAREN && delim_type == AST::PARENS)
|| (token_id == RIGHT_SQUARE && delim_type == AST::SQUARE)
|| (token_id == RIGHT_CURLY && delim_type == AST::CURLY));
}
/* Returns true if the likely result of parsing the next few tokens is a path.
* Not guaranteed, though, especially in the case of syntax errors. */
inline bool
is_likely_path_next (TokenId next_token_id)
{
switch (next_token_id)
{
case IDENTIFIER:
case SUPER:
case SELF:
case SELF_ALIAS:
case CRATE:
// maybe - maybe do extra check. But then requires another TokenId.
case DOLLAR_SIGN:
case SCOPE_RESOLUTION:
return true;
default:
return false;
}
}
// Parses a delimited token tree
template
AST::DelimTokenTree
Parser::parse_delim_token_tree ()
{
const_TokenPtr t = lexer.peek_token ();
lexer.skip_token ();
Location initial_loc = t->get_locus ();
// save delim type to ensure it is reused later
AST::DelimType delim_type = AST::PARENS;
// Map tokens to DelimType
switch (t->get_id ())
{
case LEFT_PAREN:
delim_type = AST::PARENS;
break;
case LEFT_SQUARE:
delim_type = AST::SQUARE;
break;
case LEFT_CURLY:
delim_type = AST::CURLY;
break;
default:
add_error (Error (t->get_locus (),
"unexpected token %qs - expecting delimiters (for a "
"delimited token tree)",
t->get_token_description ()));
return AST::DelimTokenTree::create_empty ();
}
// parse actual token tree vector - 0 or more
std::vector> token_trees_in_tree;
auto delim_open
= std::unique_ptr (new AST::Token (std::move (t)));
token_trees_in_tree.push_back (std::move (delim_open));
// repeat loop until finding the matching delimiter
t = lexer.peek_token ();
while (!token_id_matches_delims (t->get_id (), delim_type)
&& t->get_id () != END_OF_FILE)
{
std::unique_ptr tok_tree = parse_token_tree ();
if (tok_tree == nullptr)
{
// TODO: is this error handling appropriate?
Error error (
t->get_locus (),
"failed to parse token tree in delimited token tree - found %qs",
t->get_token_description ());
add_error (std::move (error));
return AST::DelimTokenTree::create_empty ();
}
token_trees_in_tree.push_back (std::move (tok_tree));
// lexer.skip_token();
t = lexer.peek_token ();
}
auto delim_close
= std::unique_ptr (new AST::Token (std::move (t)));
token_trees_in_tree.push_back (std::move (delim_close));
AST::DelimTokenTree token_tree (delim_type, std::move (token_trees_in_tree),
initial_loc);
// parse end delimiters
t = lexer.peek_token ();
if (token_id_matches_delims (t->get_id (), delim_type))
{
// tokens match opening delimiter, so skip.
lexer.skip_token ();
// DEBUG
rust_debug ("finished parsing new delim token tree - peeked token is now "
"'%s' while t is '%s'",
lexer.peek_token ()->get_token_description (),
t->get_token_description ());
return token_tree;
}
else
{
// tokens don't match opening delimiters, so produce error
Error error (t->get_locus (),
"unexpected token %qs - expecting closing delimiter %qs "
"(for a delimited token tree)",
t->get_token_description (),
(delim_type == AST::PARENS
? ")"
: (delim_type == AST::SQUARE ? "]" : "}")));
add_error (std::move (error));
/* return empty token tree despite possibly parsing valid token tree -
* TODO is this a good idea? */
return AST::DelimTokenTree::create_empty ();
}
}
/* Parses a TokenTree syntactical production. This is either a delimited token
* tree or a non-delimiter token. */
template
std::unique_ptr
Parser::parse_token_tree ()
{
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case LEFT_PAREN:
case LEFT_SQUARE:
case LEFT_CURLY:
// Parse delimited token tree
// TODO: use move rather than copy constructor
return std::unique_ptr (
new AST::DelimTokenTree (parse_delim_token_tree ()));
case RIGHT_PAREN:
case RIGHT_SQUARE:
case RIGHT_CURLY:
// error - should not be called when this a token
add_error (
Error (t->get_locus (),
"unexpected closing delimiter %qs - token tree requires "
"either paired delimiters or non-delimiter tokens",
t->get_token_description ()));
lexer.skip_token ();
return nullptr;
default:
// parse token itself as TokenTree
lexer.skip_token ();
return std::unique_ptr (new AST::Token (std::move (t)));
}
}
// Parses a single item
template
std::unique_ptr
Parser::parse_item (bool called_from_statement)
{
// has a "called_from_statement" parameter for better error message handling
// parse outer attributes for item
AST::AttrVec outer_attrs = parse_outer_attributes ();
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case END_OF_FILE:
// not necessarily an error, unless we just read outer
// attributes which needs to be attached
if (!outer_attrs.empty ())
{
Rust::AST::Attribute attr = outer_attrs.back ();
Error error (attr.get_locus (),
"expected item after outer attribute or doc comment");
add_error (std::move (error));
}
return nullptr;
case PUB:
case MOD:
case EXTERN_TOK:
case USE:
case FN_TOK:
case TYPE:
case STRUCT_TOK:
case ENUM_TOK:
case CONST:
case STATIC_TOK:
case TRAIT:
case IMPL:
case MACRO:
/* TODO: implement union keyword but not really because of
* context-dependence crappy hack way to parse a union written below to
* separate it from the good code. */
// case UNION:
case UNSAFE: // maybe - unsafe traits are a thing
// if any of these (should be all possible VisItem prefixes), parse a
// VisItem
return parse_vis_item (std::move (outer_attrs));
break;
case SUPER:
case SELF:
case CRATE:
case DOLLAR_SIGN:
// almost certainly macro invocation semi
return parse_macro_invocation_semi (std::move (outer_attrs));
break;
// crappy hack to do union "keyword"
case IDENTIFIER:
// TODO: ensure std::string and literal comparison works
if (t->get_str () == "union"
&& lexer.peek_token (1)->get_id () == IDENTIFIER)
{
return parse_vis_item (std::move (outer_attrs));
// or should this go straight to parsing union?
}
else if (t->get_str () == "macro_rules")
{
// macro_rules! macro item
return parse_macro_rules_def (std::move (outer_attrs));
}
else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION
|| lexer.peek_token (1)->get_id () == EXCLAM)
{
/* path (probably) or macro invocation, so probably a macro invocation
* semi */
return parse_macro_invocation_semi (std::move (outer_attrs));
}
gcc_fallthrough ();
default:
// otherwise unrecognised
add_error (Error (t->get_locus (),
"unrecognised token %qs for start of %s",
t->get_token_description (),
called_from_statement ? "statement" : "item"));
// skip somewhere?
return nullptr;
break;
}
}
// Parses a contiguous block of outer attributes.
template
AST::AttrVec
Parser::parse_outer_attributes ()
{
AST::AttrVec outer_attributes;
while (lexer.peek_token ()->get_id ()
== HASH /* Can also be #!, which catches errors. */
|| lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT
|| lexer.peek_token ()->get_id ()
== INNER_DOC_COMMENT) /* For error handling. */
{
AST::Attribute outer_attr = parse_outer_attribute ();
/* Ensure only valid outer attributes are added to the outer_attributes
* list */
if (!outer_attr.is_empty ())
{
outer_attributes.push_back (std::move (outer_attr));
}
else
{
/* If no more valid outer attributes, break out of loop (only
* contiguous outer attributes parsed). */
break;
}
}
outer_attributes.shrink_to_fit ();
return outer_attributes;
/* TODO: this shares basically all code with parse_inner_attributes except
* function call - find way of making it more modular? function pointer? */
}
// Parse a single outer attribute.
template
AST::Attribute
Parser::parse_outer_attribute ()
{
if (lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT)
return parse_doc_comment ();
if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT)
{
Error error (
lexer.peek_token ()->get_locus (),
"inner doc (%/!%> or %*!%>) only allowed at start of item "
"and before any outer attribute or doc (%<#[%>, %//%> or %**%>)");
add_error (std::move (error));
lexer.skip_token ();
return AST::Attribute::create_empty ();
}
/* OuterAttribute -> '#' '[' Attr ']' */
if (lexer.peek_token ()->get_id () != HASH)
return AST::Attribute::create_empty ();
lexer.skip_token ();
TokenId id = lexer.peek_token ()->get_id ();
if (id != LEFT_SQUARE)
{
if (id == EXCLAM)
{
// this is inner attribute syntax, so throw error
// inner attributes were either already parsed or not allowed here.
Error error (
lexer.peek_token ()->get_locus (),
"token % found, indicating inner attribute definition. Inner "
"attributes are not possible at this location");
add_error (std::move (error));
}
return AST::Attribute::create_empty ();
}
lexer.skip_token ();
AST::Attribute actual_attribute = parse_attribute_body ();
if (lexer.peek_token ()->get_id () != RIGHT_SQUARE)
return AST::Attribute::create_empty ();
lexer.skip_token ();
return actual_attribute;
}
// Parses a VisItem (item that can have non-default visibility).
template
std::unique_ptr
Parser::parse_vis_item (AST::AttrVec outer_attrs)
{
// parse visibility, which may or may not exist
AST::Visibility vis = parse_visibility ();
// select VisItem to create depending on keyword
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case MOD:
return parse_module (std::move (vis), std::move (outer_attrs));
case EXTERN_TOK:
// lookahead to resolve syntactical production
t = lexer.peek_token (1);
switch (t->get_id ())
{
case CRATE:
return parse_extern_crate (std::move (vis), std::move (outer_attrs));
case FN_TOK: // extern function
return parse_function (std::move (vis), std::move (outer_attrs));
case LEFT_CURLY: // extern block
return parse_extern_block (std::move (vis), std::move (outer_attrs));
case STRING_LITERAL: // for specifying extern ABI
// could be extern block or extern function, so more lookahead
t = lexer.peek_token (2);
switch (t->get_id ())
{
case FN_TOK:
return parse_function (std::move (vis), std::move (outer_attrs));
case LEFT_CURLY:
return parse_extern_block (std::move (vis),
std::move (outer_attrs));
default:
add_error (
Error (t->get_locus (),
"unexpected token %qs in some sort of extern production",
t->get_token_description ()));
lexer.skip_token (2); // TODO: is this right thing to do?
return nullptr;
}
default:
add_error (
Error (t->get_locus (),
"unexpected token %qs in some sort of extern production",
t->get_token_description ()));
lexer.skip_token (1); // TODO: is this right thing to do?
return nullptr;
}
case USE:
return parse_use_decl (std::move (vis), std::move (outer_attrs));
case FN_TOK:
return parse_function (std::move (vis), std::move (outer_attrs));
case TYPE:
return parse_type_alias (std::move (vis), std::move (outer_attrs));
case STRUCT_TOK:
return parse_struct (std::move (vis), std::move (outer_attrs));
case ENUM_TOK:
return parse_enum (std::move (vis), std::move (outer_attrs));
// TODO: implement union keyword but not really because of
// context-dependence case UNION: crappy hack to do union "keyword"
case IDENTIFIER:
if (t->get_str () == "union"
&& lexer.peek_token (1)->get_id () == IDENTIFIER)
{
return parse_union (std::move (vis), std::move (outer_attrs));
// or should item switch go straight to parsing union?
}
else
{
break;
}
case CONST:
// lookahead to resolve syntactical production
t = lexer.peek_token (1);
switch (t->get_id ())
{
case IDENTIFIER:
case UNDERSCORE:
return parse_const_item (std::move (vis), std::move (outer_attrs));
case UNSAFE:
case EXTERN_TOK:
case FN_TOK:
return parse_function (std::move (vis), std::move (outer_attrs));
default:
add_error (
Error (t->get_locus (),
"unexpected token %qs in some sort of const production",
t->get_token_description ()));
lexer.skip_token (1); // TODO: is this right thing to do?
return nullptr;
}
case STATIC_TOK:
return parse_static_item (std::move (vis), std::move (outer_attrs));
case TRAIT:
return parse_trait (std::move (vis), std::move (outer_attrs));
case IMPL:
return parse_impl (std::move (vis), std::move (outer_attrs));
case UNSAFE: // unsafe traits, unsafe functions, unsafe impls (trait impls),
// lookahead to resolve syntactical production
t = lexer.peek_token (1);
switch (t->get_id ())
{
case TRAIT:
return parse_trait (std::move (vis), std::move (outer_attrs));
case EXTERN_TOK:
case FN_TOK:
return parse_function (std::move (vis), std::move (outer_attrs));
case IMPL:
return parse_impl (std::move (vis), std::move (outer_attrs));
default:
add_error (
Error (t->get_locus (),
"unexpected token %qs in some sort of unsafe production",
t->get_token_description ()));
lexer.skip_token (1); // TODO: is this right thing to do?
return nullptr;
}
case MACRO:
return parse_decl_macro_def (std::move (vis), std::move (outer_attrs));
default:
// otherwise vis item clearly doesn't exist, which is not an error
// has a catch-all post-switch return to allow other breaks to occur
break;
}
return nullptr;
}
// Parses a macro rules definition syntax extension whatever thing.
template
std::unique_ptr
Parser::parse_macro_rules_def (AST::AttrVec outer_attrs)
{
// ensure that first token is identifier saying "macro_rules"
const_TokenPtr t = lexer.peek_token ();
if (t->get_id () != IDENTIFIER || t->get_str () != "macro_rules")
{
Error error (
t->get_locus (),
"macro rules definition does not start with %");
add_error (std::move (error));
// skip after somewhere?
return nullptr;
}
lexer.skip_token ();
Location macro_locus = t->get_locus ();
if (!skip_token (EXCLAM))
{
// skip after somewhere?
return nullptr;
}
// parse macro name
const_TokenPtr ident_tok = expect_token (IDENTIFIER);
if (ident_tok == nullptr)
{
return nullptr;
}
Identifier rule_name = ident_tok->get_str ();
// DEBUG
rust_debug ("in macro rules def, about to parse parens.");
// save delim type to ensure it is reused later
AST::DelimType delim_type = AST::PARENS;
// Map tokens to DelimType
t = lexer.peek_token ();
switch (t->get_id ())
{
case LEFT_PAREN:
delim_type = AST::PARENS;
break;
case LEFT_SQUARE:
delim_type = AST::SQUARE;
break;
case LEFT_CURLY:
delim_type = AST::CURLY;
break;
default:
add_error (Error (t->get_locus (),
"unexpected token %qs - expecting delimiters (for a "
"macro rules definition)",
t->get_token_description ()));
return nullptr;
}
lexer.skip_token ();
// parse actual macro rules
std::vector macro_rules;
// must be at least one macro rule, so parse it
AST::MacroRule initial_rule = parse_macro_rule ();
if (initial_rule.is_error ())
{
Error error (lexer.peek_token ()->get_locus (),
"required first macro rule in macro rules definition "
"could not be parsed");
add_error (std::move (error));
// skip after somewhere?
return nullptr;
}
macro_rules.push_back (std::move (initial_rule));
// DEBUG
rust_debug ("successfully pushed back initial macro rule");
t = lexer.peek_token ();
// parse macro rules
while (t->get_id () == SEMICOLON)
{
// skip semicolon
lexer.skip_token ();
// don't parse if end of macro rules
if (token_id_matches_delims (lexer.peek_token ()->get_id (), delim_type))
{
// DEBUG
rust_debug (
"broke out of parsing macro rules loop due to finding delim");
break;
}
// try to parse next rule
AST::MacroRule rule = parse_macro_rule ();
if (rule.is_error ())
{
Error error (lexer.peek_token ()->get_locus (),
"failed to parse macro rule in macro rules definition");
add_error (std::move (error));
return nullptr;
}
macro_rules.push_back (std::move (rule));
// DEBUG
rust_debug ("successfully pushed back another macro rule");
t = lexer.peek_token ();
}
// parse end delimiters
t = lexer.peek_token ();
if (token_id_matches_delims (t->get_id (), delim_type))
{
// tokens match opening delimiter, so skip.
lexer.skip_token ();
if (delim_type != AST::CURLY)
{
// skip semicolon at end of non-curly macro definitions
if (!skip_token (SEMICOLON))
{
// as this is the end, allow recovery (probably) - may change
return std::unique_ptr (
AST::MacroRulesDefinition::mbe (
std::move (rule_name), delim_type, std::move (macro_rules),
std::move (outer_attrs), macro_locus));
}
}
return std::unique_ptr (
AST::MacroRulesDefinition::mbe (std::move (rule_name), delim_type,
std::move (macro_rules),
std::move (outer_attrs), macro_locus));
}
else
{
// tokens don't match opening delimiters, so produce error
Error error (t->get_locus (),
"unexpected token %qs - expecting closing delimiter %qs "
"(for a macro rules definition)",
t->get_token_description (),
(delim_type == AST::PARENS
? ")"
: (delim_type == AST::SQUARE ? "]" : "}")));
add_error (std::move (error));
/* return empty macro definiton despite possibly parsing mostly valid one
* - TODO is this a good idea? */
return nullptr;
}
}
// Parses a declarative macro 2.0 definition.
template
std::unique_ptr
Parser::parse_decl_macro_def (AST::Visibility vis,
AST::AttrVec outer_attrs)
{
// ensure that first token is identifier saying "macro"
const_TokenPtr t = lexer.peek_token ();
if (t->get_id () != MACRO)
{
Error error (
t->get_locus (),
"declarative macro definition does not start with %");
add_error (std::move (error));
// skip after somewhere?
return nullptr;
}
lexer.skip_token ();
Location macro_locus = t->get_locus ();
// parse macro name
const_TokenPtr ident_tok = expect_token (IDENTIFIER);
if (ident_tok == nullptr)
{
return nullptr;
}
Identifier rule_name = ident_tok->get_str ();
t = lexer.peek_token ();
if (t->get_id () == LEFT_PAREN)
{
// single definiton of macro rule
// e.g. `macro foo($e:expr) {}`
// parse macro matcher
Location locus = lexer.peek_token ()->get_locus ();
AST::MacroMatcher matcher = parse_macro_matcher ();
if (matcher.is_error ())
return nullptr;
// check delimiter of macro matcher
if (matcher.get_delim_type () != AST::DelimType::PARENS)
{
Error error (locus, "only parenthesis can be used for a macro "
"matcher in declarative macro definition");
add_error (std::move (error));
return nullptr;
}
Location transcriber_loc = lexer.peek_token ()->get_locus ();
AST::DelimTokenTree delim_tok_tree = parse_delim_token_tree ();
AST::MacroTranscriber transcriber (delim_tok_tree, transcriber_loc);
if (transcriber.get_token_tree ().get_delim_type ()
!= AST::DelimType::CURLY)
{
Error error (transcriber_loc,
"only braces can be used for a macro transcriber "
"in declarative macro definition");
add_error (std::move (error));
return nullptr;
}
AST::MacroRule macro_rule
= AST::MacroRule (std::move (matcher), std::move (transcriber), locus);
std::vector macro_rules;
macro_rules.push_back (macro_rule);
return std::unique_ptr (
AST::MacroRulesDefinition::decl_macro (std::move (rule_name),
macro_rules,
std::move (outer_attrs),
macro_locus, vis));
}
else if (t->get_id () == LEFT_CURLY)
{
// multiple definitions of macro rule separated by comma
// e.g. `macro foo { () => {}, ($e:expr) => {}, }`
// parse left curly
const_TokenPtr left_curly = expect_token (LEFT_CURLY);
if (left_curly == nullptr)
{
return nullptr;
}
// parse actual macro rules
std::vector macro_rules;
// must be at least one macro rule, so parse it
AST::MacroRule initial_rule = parse_macro_rule ();
if (initial_rule.is_error ())
{
Error error (
lexer.peek_token ()->get_locus (),
"required first macro rule in declarative macro definition "
"could not be parsed");
add_error (std::move (error));
// skip after somewhere?
return nullptr;
}
macro_rules.push_back (std::move (initial_rule));
t = lexer.peek_token ();
// parse macro rules
while (t->get_id () == COMMA)
{
// skip comma
lexer.skip_token ();
// don't parse if end of macro rules
if (token_id_matches_delims (lexer.peek_token ()->get_id (),
AST::CURLY))
{
break;
}
// try to parse next rule
AST::MacroRule rule = parse_macro_rule ();
if (rule.is_error ())
{
Error error (
lexer.peek_token ()->get_locus (),
"failed to parse macro rule in declarative macro definition");
add_error (std::move (error));
return nullptr;
}
macro_rules.push_back (std::move (rule));
t = lexer.peek_token ();
}
// parse right curly
const_TokenPtr right_curly = expect_token (RIGHT_CURLY);
if (right_curly == nullptr)
{
return nullptr;
}
return std::unique_ptr (
AST::MacroRulesDefinition::decl_macro (std::move (rule_name),
std::move (macro_rules),
std::move (outer_attrs),
macro_locus, vis));
}
else
{
add_error (Error (t->get_locus (),
"unexpected token %qs - expecting delimiters "
"(for a declarative macro definiton)",
t->get_token_description ()));
return nullptr;
}
}
// Parses a semi-coloned (except for full block) macro invocation item.
template
std::unique_ptr
Parser::parse_macro_invocation_semi (
AST::AttrVec outer_attrs)
{
Location macro_locus = lexer.peek_token ()->get_locus ();
AST::SimplePath path = parse_simple_path ();
if (!skip_token (EXCLAM))
{
// skip after somewhere?
return nullptr;
}
// save delim type to ensure it is reused later
AST::DelimType delim_type = AST::PARENS;
// Map tokens to DelimType
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case LEFT_PAREN:
delim_type = AST::PARENS;
break;
case LEFT_SQUARE:
delim_type = AST::SQUARE;
break;
case LEFT_CURLY:
delim_type = AST::CURLY;
break;
default:
add_error (Error (t->get_locus (),
"unexpected token %qs - expecting delimiters (for a "
"macro invocation semi body)",
t->get_token_description ()));
return nullptr;
}
Location tok_tree_locus = t->get_locus ();
lexer.skip_token ();
// parse actual token trees
std::vector> token_trees;
auto delim_open
= std::unique_ptr (new AST::Token (std::move (t)));
token_trees.push_back (std::move (delim_open));
t = lexer.peek_token ();
// parse token trees until the initial delimiter token is found again
while (!token_id_matches_delims (t->get_id (), delim_type))
{
std::unique_ptr tree = parse_token_tree ();
if (tree == nullptr)
{
Error error (t->get_locus (),
"failed to parse token tree for macro invocation semi "
"- found %qs",
t->get_token_description ());
add_error (std::move (error));
return nullptr;
}
token_trees.push_back (std::move (tree));
t = lexer.peek_token ();
}
auto delim_close
= std::unique_ptr (new AST::Token (std::move (t)));
token_trees.push_back (std::move (delim_close));
AST::DelimTokenTree delim_tok_tree (delim_type, std::move (token_trees),
tok_tree_locus);
AST::MacroInvocData invoc_data (std::move (path), std::move (delim_tok_tree));
// parse end delimiters
t = lexer.peek_token ();
if (token_id_matches_delims (t->get_id (), delim_type))
{
// tokens match opening delimiter, so skip.
lexer.skip_token ();
if (delim_type != AST::CURLY)
{
// skip semicolon at end of non-curly macro invocation semis
if (!skip_token (SEMICOLON))
{
// as this is the end, allow recovery (probably) - may change
return AST::MacroInvocation::Regular (std::move (invoc_data),
std::move (outer_attrs),
macro_locus, true);
}
}
// DEBUG:
rust_debug ("skipped token is '%s', next token (current peek) is '%s'",
t->get_token_description (),
lexer.peek_token ()->get_token_description ());
return AST::MacroInvocation::Regular (std::move (invoc_data),
std::move (outer_attrs),
macro_locus, true);
}
else
{
// tokens don't match opening delimiters, so produce error
Error error (t->get_locus (),
"unexpected token %qs - expecting closing delimiter %qs "
"(for a macro invocation semi)",
t->get_token_description (),
(delim_type == AST::PARENS
? ")"
: (delim_type == AST::SQUARE ? "]" : "}")));
add_error (std::move (error));
/* return empty macro invocation despite possibly parsing mostly valid one
* - TODO is this a good idea? */
return nullptr;
}
}
// Parses a non-semicoloned macro invocation (i.e. as pattern or expression).
template
std::unique_ptr
Parser::parse_macro_invocation (AST::AttrVec outer_attrs)
{
// parse macro path
AST::SimplePath macro_path = parse_simple_path ();
if (macro_path.is_empty ())
{
Error error (lexer.peek_token ()->get_locus (),
"failed to parse macro invocation path");
add_error (std::move (error));
// skip?
return nullptr;
}
if (!skip_token (EXCLAM))
{
// skip after somewhere?
return nullptr;
}
// parse internal delim token tree
AST::DelimTokenTree delim_tok_tree = parse_delim_token_tree ();
Location macro_locus = macro_path.get_locus ();
return AST::MacroInvocation::Regular (
AST::MacroInvocData (std::move (macro_path), std::move (delim_tok_tree)),
std::move (outer_attrs), macro_locus);
}
// Parses a macro rule definition - does not parse semicolons.
template
AST::MacroRule
Parser::parse_macro_rule ()
{
Location locus = lexer.peek_token ()->get_locus ();
// parse macro matcher
AST::MacroMatcher matcher = parse_macro_matcher ();
if (matcher.is_error ())
return AST::MacroRule::create_error (locus);
if (!skip_token (MATCH_ARROW))
{
// skip after somewhere?
return AST::MacroRule::create_error (locus);
}
// parse transcriber (this is just a delim token tree)
Location token_tree_loc = lexer.peek_token ()->get_locus ();
AST::MacroTranscriber transcriber (parse_delim_token_tree (), token_tree_loc);
return AST::MacroRule (std::move (matcher), std::move (transcriber), locus);
}
// Parses a macro matcher (part of a macro rule definition).
template
AST::MacroMatcher
Parser::parse_macro_matcher ()
{
// save delim type to ensure it is reused later
AST::DelimType delim_type = AST::PARENS;
// DEBUG
rust_debug ("begun parsing macro matcher");
// Map tokens to DelimType
const_TokenPtr t = lexer.peek_token ();
Location locus = t->get_locus ();
switch (t->get_id ())
{
case LEFT_PAREN:
delim_type = AST::PARENS;
break;
case LEFT_SQUARE:
delim_type = AST::SQUARE;
break;
case LEFT_CURLY:
delim_type = AST::CURLY;
break;
default:
add_error (Error (
t->get_locus (),
"unexpected token %qs - expecting delimiters (for a macro matcher)",
t->get_token_description ()));
return AST::MacroMatcher::create_error (t->get_locus ());
}
lexer.skip_token ();
// parse actual macro matches
std::vector> matches;
// Set of possible preceding macro matches to make sure follow-set
// restrictions are respected.
// TODO: Consider using std::reference_wrapper instead of raw pointers?
std::vector last_matches;
t = lexer.peek_token ();
// parse token trees until the initial delimiter token is found again
while (!token_id_matches_delims (t->get_id (), delim_type))
{
std::unique_ptr match = parse_macro_match ();
if (match == nullptr)
{
Error error (
t->get_locus (),
"failed to parse macro match for macro matcher - found %qs",
t->get_token_description ());
add_error (std::move (error));
return AST::MacroMatcher::create_error (t->get_locus ());
}
if (matches.size () > 0)
{
const auto *last_match = matches.back ().get ();
// We want to check if we are dealing with a zeroable repetition
bool zeroable = false;
if (last_match->get_macro_match_type ()
== AST::MacroMatch::MacroMatchType::Repetition)
{
auto repetition
= static_cast (last_match);
if (repetition->get_op ()
!= AST::MacroMatchRepetition::MacroRepOp::ONE_OR_MORE)
zeroable = true;
}
if (!zeroable)
last_matches.clear ();
last_matches.emplace_back (last_match);
for (auto last : last_matches)
if (!is_match_compatible (*last, *match))
return AST::MacroMatcher::create_error (
match->get_match_locus ());
}
matches.push_back (std::move (match));
// DEBUG
rust_debug ("pushed back a match in macro matcher");
t = lexer.peek_token ();
}
// parse end delimiters
t = lexer.peek_token ();
if (token_id_matches_delims (t->get_id (), delim_type))
{
// tokens match opening delimiter, so skip.
lexer.skip_token ();
return AST::MacroMatcher (delim_type, std::move (matches), locus);
}
else
{
// tokens don't match opening delimiters, so produce error
Error error (t->get_locus (),
"unexpected token %qs - expecting closing delimiter %qs "
"(for a macro matcher)",
t->get_token_description (),
(delim_type == AST::PARENS
? ")"
: (delim_type == AST::SQUARE ? "]" : "}")));
add_error (std::move (error));
/* return error macro matcher despite possibly parsing mostly correct one?
* TODO is this the best idea? */
return AST::MacroMatcher::create_error (t->get_locus ());
}
}
// Parses a macro match (syntax match inside a matcher in a macro rule).
template
std::unique_ptr
Parser::parse_macro_match ()
{
// branch based on token available
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case LEFT_PAREN:
case LEFT_SQUARE:
case LEFT_CURLY: {
// must be macro matcher as delimited
AST::MacroMatcher matcher = parse_macro_matcher ();
if (matcher.is_error ())
{
Error error (lexer.peek_token ()->get_locus (),
"failed to parse macro matcher in macro match");
add_error (std::move (error));
return nullptr;
}
return std::unique_ptr (
new AST::MacroMatcher (std::move (matcher)));
}
case DOLLAR_SIGN: {
// have to do more lookahead to determine if fragment or repetition
const_TokenPtr t2 = lexer.peek_token (1);
switch (t2->get_id ())
{
case ABSTRACT:
case AS:
case ASYNC:
case BECOME:
case BOX:
case BREAK:
case CONST:
case CONTINUE:
case CRATE:
case DO:
case DYN:
case ELSE:
case ENUM_TOK:
case EXTERN_TOK:
case FALSE_LITERAL:
case FINAL_TOK:
case FN_TOK:
case FOR:
case IF:
case IMPL:
case IN:
case LET:
case LOOP:
case MACRO:
case MATCH_TOK:
case MOD:
case MOVE:
case MUT:
case OVERRIDE_TOK:
case PRIV:
case PUB:
case REF:
case RETURN_TOK:
case SELF_ALIAS:
case SELF:
case STATIC_TOK:
case STRUCT_TOK:
case SUPER:
case TRAIT:
case TRUE_LITERAL:
case TRY:
case TYPE:
case TYPEOF:
case UNSAFE:
case UNSIZED:
case USE:
case VIRTUAL:
case WHERE:
case WHILE:
case YIELD:
case IDENTIFIER:
// macro fragment
return parse_macro_match_fragment ();
case LEFT_PAREN:
// macro repetition
return parse_macro_match_repetition ();
default:
// error: unrecognised
add_error (
Error (t2->get_locus (),
"unrecognised token combination %<$%s%> at start of "
"macro match - did you mean %<$identifier%> or %<$(%>?",
t2->get_token_description ()));
// skip somewhere?
return nullptr;
}
}
case RIGHT_PAREN:
case RIGHT_SQUARE:
case RIGHT_CURLY:
// not allowed
add_error (Error (
t->get_locus (),
"closing delimiters like %qs are not allowed at the start of a macro "
"match",
t->get_token_description ()));
// skip somewhere?
return nullptr;
default:
// just the token
lexer.skip_token ();
return std::unique_ptr (new AST::Token (std::move (t)));
}
}
// Parses a fragment macro match.
template
std::unique_ptr
Parser::parse_macro_match_fragment ()
{
Location fragment_locus = lexer.peek_token ()->get_locus ();
skip_token (DOLLAR_SIGN);
Identifier ident = "";
auto identifier = lexer.peek_token ();
if (identifier->has_str ())
ident = identifier->get_str ();
else
ident = std::string (token_id_to_str (identifier->get_id ()));
if (ident.empty ())
{
Error error (lexer.peek_token ()->get_locus (),
"missing identifier in macro match fragment");
add_error (std::move (error));
return nullptr;
}
skip_token (identifier->get_id ());
if (!skip_token (COLON))
{
// skip after somewhere?
return nullptr;
}
// get MacroFragSpec for macro
const_TokenPtr t = expect_token (IDENTIFIER);
if (t == nullptr)
return nullptr;
AST::MacroFragSpec frag
= AST::MacroFragSpec::get_frag_spec_from_str (t->get_str ());
if (frag.is_error ())
{
Error error (t->get_locus (),
"invalid fragment specifier %qs in fragment macro match",
t->get_str ().c_str ());
add_error (std::move (error));
return nullptr;
}
return std::unique_ptr (
new AST::MacroMatchFragment (std::move (ident), frag, fragment_locus));
}
// Parses a repetition macro match.
template
std::unique_ptr
Parser::parse_macro_match_repetition ()
{
skip_token (DOLLAR_SIGN);
skip_token (LEFT_PAREN);
std::vector> matches;
// parse required first macro match
std::unique_ptr initial_match = parse_macro_match ();
if (initial_match == nullptr)
{
Error error (
lexer.peek_token ()->get_locus (),
"could not parse required first macro match in macro match repetition");
add_error (std::move (error));
// skip after somewhere?
return nullptr;
}
matches.push_back (std::move (initial_match));
// parse optional later macro matches
const_TokenPtr t = lexer.peek_token ();
while (t->get_id () != RIGHT_PAREN)
{
std::unique_ptr match = parse_macro_match ();
if (match == nullptr)
{
Error error (lexer.peek_token ()->get_locus (),
"failed to parse macro match in macro match repetition");
add_error (std::move (error));
return nullptr;
}
matches.push_back (std::move (match));
t = lexer.peek_token ();
}
if (!skip_token (RIGHT_PAREN))
{
// skip after somewhere?
return nullptr;
}
t = lexer.peek_token ();
// see if separator token exists
std::unique_ptr separator = nullptr;
switch (t->get_id ())
{
// repetition operators
case ASTERISK:
case PLUS:
case QUESTION_MARK:
// delimiters
case LEFT_PAREN:
case LEFT_CURLY:
case LEFT_SQUARE:
case RIGHT_PAREN:
case RIGHT_CURLY:
case RIGHT_SQUARE:
// separator does not exist, so still null and don't skip token
break;
default:
// separator does exist
separator = std::unique_ptr (new AST::Token (std::move (t)));
lexer.skip_token ();
break;
}
// parse repetition operator
t = lexer.peek_token ();
AST::MacroMatchRepetition::MacroRepOp op = AST::MacroMatchRepetition::NONE;
switch (t->get_id ())
{
case ASTERISK:
op = AST::MacroMatchRepetition::ANY;
lexer.skip_token ();
break;
case PLUS:
op = AST::MacroMatchRepetition::ONE_OR_MORE;
lexer.skip_token ();
break;
case QUESTION_MARK:
op = AST::MacroMatchRepetition::ZERO_OR_ONE;
lexer.skip_token ();
break;
default:
add_error (
Error (t->get_locus (),
"expected macro repetition operator (%<*%>, %<+%>, or %%>) in "
"macro match - found %qs",
t->get_token_description ()));
// skip after somewhere?
return nullptr;
}
return std::unique_ptr (
new AST::MacroMatchRepetition (std::move (matches), op,
std::move (separator), t->get_locus ()));
}
/* Parses a visibility syntactical production (i.e. creating a non-default
* visibility) */
template
AST::Visibility
Parser::parse_visibility ()
{
// check for no visibility
if (lexer.peek_token ()->get_id () != PUB)
{
return AST::Visibility::create_private ();
}
auto vis_loc = lexer.peek_token ()->get_locus ();
lexer.skip_token ();
// create simple pub visibility if no parentheses
if (lexer.peek_token ()->get_id () != LEFT_PAREN)
{
return AST::Visibility::create_public (vis_loc);
// or whatever
}
lexer.skip_token ();
const_TokenPtr t = lexer.peek_token ();
auto path_loc = t->get_locus ();
switch (t->get_id ())
{
case CRATE:
lexer.skip_token ();
skip_token (RIGHT_PAREN);
return AST::Visibility::create_crate (path_loc, vis_loc);
case SELF:
lexer.skip_token ();
skip_token (RIGHT_PAREN);
return AST::Visibility::create_self (path_loc, vis_loc);
case SUPER:
lexer.skip_token ();
skip_token (RIGHT_PAREN);
return AST::Visibility::create_super (path_loc, vis_loc);
case IN: {
lexer.skip_token ();
// parse the "in" path as well
AST::SimplePath path = parse_simple_path ();
if (path.is_empty ())
{
Error error (lexer.peek_token ()->get_locus (),
"missing path in pub(in path) visibility");
add_error (std::move (error));
// skip after somewhere?
return AST::Visibility::create_error ();
}
skip_token (RIGHT_PAREN);
return AST::Visibility::create_in_path (std::move (path), vis_loc);
}
default:
add_error (Error (t->get_locus (), "unexpected token %qs in visibility",
t->get_token_description ()));
lexer.skip_token ();
return AST::Visibility::create_error ();
}
}
// Parses a module - either a bodied module or a module defined in another file.
template
std::unique_ptr
Parser::parse_module (AST::Visibility vis,
AST::AttrVec outer_attrs)
{
Location locus = lexer.peek_token ()->get_locus ();
skip_token (MOD);
const_TokenPtr module_name = expect_token (IDENTIFIER);
if (module_name == nullptr)
{
return nullptr;
}
Identifier name = module_name->get_str ();
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case SEMICOLON:
lexer.skip_token ();
// Construct an external module
return std::unique_ptr (
new AST::Module (std::move (name), std::move (vis),
std::move (outer_attrs), locus, lexer.get_filename (),
inline_module_stack));
case LEFT_CURLY: {
lexer.skip_token ();
// parse inner attributes
AST::AttrVec inner_attrs = parse_inner_attributes ();
std::string module_path_name
= extract_module_path (inner_attrs, outer_attrs, name);
InlineModuleStackScope scope (*this, std::move (module_path_name));
// parse items
std::vector> items;
const_TokenPtr tok = lexer.peek_token ();
while (tok->get_id () != RIGHT_CURLY)
{
std::unique_ptr item = parse_item (false);
if (item == nullptr)
{
Error error (tok->get_locus (),
"failed to parse item in module");
add_error (std::move (error));
return nullptr;
}
items.push_back (std::move (item));
tok = lexer.peek_token ();
}
if (!skip_token (RIGHT_CURLY))
{
// skip somewhere?
return nullptr;
}
return std::unique_ptr (
new AST::Module (std::move (name), locus, std::move (items),
std::move (vis), std::move (inner_attrs),
std::move (outer_attrs))); // module name?
}
default:
add_error (
Error (t->get_locus (),
"unexpected token %qs in module declaration/definition item",
t->get_token_description ()));
lexer.skip_token ();
return nullptr;
}
}
// Parses an extern crate declaration (dependency on external crate)
template
std::unique_ptr
Parser::parse_extern_crate (AST::Visibility vis,
AST::AttrVec outer_attrs)
{
Location locus = lexer.peek_token ()->get_locus ();
if (!skip_token (EXTERN_TOK))
{
skip_after_semicolon ();
return nullptr;
}
if (!skip_token (CRATE))
{
skip_after_semicolon ();
return nullptr;
}
/* parse crate reference name - this has its own syntactical rule in reference
* but seems to not be used elsewhere, so i'm putting it here */
const_TokenPtr crate_name_tok = lexer.peek_token ();
std::string crate_name;
switch (crate_name_tok->get_id ())
{
case IDENTIFIER:
crate_name = crate_name_tok->get_str ();
lexer.skip_token ();
break;
case SELF:
crate_name = "self";
lexer.skip_token ();
break;
default:
add_error (
Error (crate_name_tok->get_locus (),
"expecting crate name (identifier or %), found %qs",
crate_name_tok->get_token_description ()));
skip_after_semicolon ();
return nullptr;
}
// don't parse as clause if it doesn't exist
if (lexer.peek_token ()->get_id () == SEMICOLON)
{
lexer.skip_token ();
return std::unique_ptr (
new AST::ExternCrate (std::move (crate_name), std::move (vis),
std::move (outer_attrs), locus));
}
/* parse as clause - this also has its own syntactical rule in reference and
* also seems to not be used elsewhere, so including here again. */
if (!skip_token (AS))
{
skip_after_semicolon ();
return nullptr;
}
const_TokenPtr as_name_tok = lexer.peek_token ();
std::string as_name;
switch (as_name_tok->get_id ())
{
case IDENTIFIER:
as_name = as_name_tok->get_str ();
lexer.skip_token ();
break;
case UNDERSCORE:
as_name = "_";
lexer.skip_token ();
break;
default:
add_error (
Error (as_name_tok->get_locus (),
"expecting as clause name (identifier or %<_%>), found %qs",
as_name_tok->get_token_description ()));
skip_after_semicolon ();
return nullptr;
}
if (!skip_token (SEMICOLON))
{
skip_after_semicolon ();
return nullptr;
}
return std::unique_ptr (
new AST::ExternCrate (std::move (crate_name), std::move (vis),
std::move (outer_attrs), locus, std::move (as_name)));
}
// Parses a use declaration.
template
std::unique_ptr
Parser::parse_use_decl (AST::Visibility vis,
AST::AttrVec outer_attrs)
{
Location locus = lexer.peek_token ()->get_locus ();
if (!skip_token (USE))
{
skip_after_semicolon ();
return nullptr;
}
// parse use tree, which is required
std::unique_ptr use_tree = parse_use_tree ();
if (use_tree == nullptr)
{
Error error (lexer.peek_token ()->get_locus (),
"could not parse use tree in use declaration");
add_error (std::move (error));
skip_after_semicolon ();
return nullptr;
}
if (!skip_token (SEMICOLON))
{
skip_after_semicolon ();
return nullptr;
}
return std::unique_ptr (
new AST::UseDeclaration (std::move (use_tree), std::move (vis),
std::move (outer_attrs), locus));
}
// Parses a use tree (which can be recursive and is actually a base class).
template
std::unique_ptr
Parser::parse_use_tree ()
{
/* potential syntax definitions in attempt to get algorithm:
* Glob:
* <- SimplePath :: *
* <- :: *
* <- *
* Nested tree thing:
* <- SimplePath :: { COMPLICATED_INNER_TREE_THING }
* <- :: COMPLICATED_INNER_TREE_THING }
* <- { COMPLICATED_INNER_TREE_THING }
* Rebind thing:
* <- SimplePath as IDENTIFIER
* <- SimplePath as _
* <- SimplePath
*/
/* current plan of attack: try to parse SimplePath first - if fails, one of
* top two then try parse :: - if fails, one of top two. Next is deciding
* character for top two. */
/* Thus, parsing smaller parts of use tree may require feeding into function
* via parameters (or could handle all in this single function because other
* use tree types aren't recognised as separate in the spec) */
// TODO: I think this function is too complex, probably should split it
Location locus = lexer.peek_token ()->get_locus ();
// bool has_path = false;
AST::SimplePath path = parse_simple_path ();
if (path.is_empty ())
{
// has no path, so must be glob or nested tree UseTree type
bool is_global = false;
// check for global scope resolution operator
if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION)
{
lexer.skip_token ();
is_global = true;
}
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case ASTERISK:
// glob UseTree type
lexer.skip_token ();
if (is_global)
return std::unique_ptr (
new AST::UseTreeGlob (AST::UseTreeGlob::GLOBAL,
AST::SimplePath::create_empty (), locus));
else
return std::unique_ptr (
new AST::UseTreeGlob (AST::UseTreeGlob::NO_PATH,
AST::SimplePath::create_empty (), locus));
case LEFT_CURLY: {
// nested tree UseTree type
lexer.skip_token ();
std::vector> use_trees;
const_TokenPtr t = lexer.peek_token ();
while (t->get_id () != RIGHT_CURLY)
{
std::unique_ptr use_tree = parse_use_tree ();
if (use_tree == nullptr)
{
break;
}
use_trees.push_back (std::move (use_tree));
if (lexer.peek_token ()->get_id () != COMMA)
break;
lexer.skip_token ();
t = lexer.peek_token ();
}
// skip end curly delimiter
if (!skip_token (RIGHT_CURLY))
{
// skip after somewhere?
return nullptr;
}
if (is_global)
return std::unique_ptr (
new AST::UseTreeList (AST::UseTreeList::GLOBAL,
AST::SimplePath::create_empty (),
std::move (use_trees), locus));
else
return std::unique_ptr (
new AST::UseTreeList (AST::UseTreeList::NO_PATH,
AST::SimplePath::create_empty (),
std::move (use_trees), locus));
}
case AS:
// this is not allowed
add_error (Error (
t->get_locus (),
"use declaration with rebind % requires a valid simple path - "
"none found"));
skip_after_semicolon ();
return nullptr;
default:
add_error (Error (t->get_locus (),
"unexpected token %qs in use tree with "
"no valid simple path (i.e. list"
" or glob use tree)",
t->get_token_description ()));
skip_after_semicolon ();
return nullptr;
}
}
else
{
/* Due to aforementioned implementation issues, the trailing :: token is
* consumed by the path, so it can not be used as a disambiguator.
* NOPE, not true anymore - TODO what are the consequences of this? */
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case ASTERISK:
// glob UseTree type
lexer.skip_token ();
return std::unique_ptr (
new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED,
std::move (path), locus));
case LEFT_CURLY: {
// nested tree UseTree type
lexer.skip_token ();
std::vector> use_trees;
// TODO: think of better control structure
const_TokenPtr t = lexer.peek_token ();
while (t->get_id () != RIGHT_CURLY)
{
std::unique_ptr use_tree = parse_use_tree ();
if (use_tree == nullptr)
{
break;
}
use_trees.push_back (std::move (use_tree));
if (lexer.peek_token ()->get_id () != COMMA)
break;
lexer.skip_token ();
t = lexer.peek_token ();
}
// skip end curly delimiter
if (!skip_token (RIGHT_CURLY))
{
// skip after somewhere?
return nullptr;
}
return std::unique_ptr (
new AST::UseTreeList (AST::UseTreeList::PATH_PREFIXED,
std::move (path), std::move (use_trees),
locus));
}
case AS: {
// rebind UseTree type
lexer.skip_token ();
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case IDENTIFIER:
// skip lexer token
lexer.skip_token ();
return std::unique_ptr (
new AST::UseTreeRebind (AST::UseTreeRebind::IDENTIFIER,
std::move (path), locus,
t->get_str ()));
case UNDERSCORE:
// skip lexer token
lexer.skip_token ();
return std::unique_ptr (
new AST::UseTreeRebind (AST::UseTreeRebind::WILDCARD,
std::move (path), locus, "_"));
default:
add_error (Error (
t->get_locus (),
"unexpected token %qs in use tree with as clause - expected "
"identifier or %<_%>",
t->get_token_description ()));
skip_after_semicolon ();
return nullptr;
}
}
case SEMICOLON:
// rebind UseTree type without rebinding - path only
// don't skip semicolon - handled in parse_use_tree
// lexer.skip_token();
return std::unique_ptr (
new AST::UseTreeRebind (AST::UseTreeRebind::NONE, std::move (path),
locus));
case COMMA:
case RIGHT_CURLY:
// this may occur in recursive calls - assume it is ok and ignore it
return std::unique_ptr (
new AST::UseTreeRebind (AST::UseTreeRebind::NONE, std::move (path),
locus));
default:
add_error (Error (t->get_locus (),
"unexpected token %qs in use tree with valid path",
t->get_token_description ()));
// skip_after_semicolon();
return nullptr;
}
}
}
// Parses a function (not a method).
template
std::unique_ptr
Parser::parse_function (AST::Visibility vis,
AST::AttrVec outer_attrs)
{
Location locus = lexer.peek_token ()->get_locus ();
// Get qualifiers for function if they exist
AST::FunctionQualifiers qualifiers = parse_function_qualifiers ();
skip_token (FN_TOK);
// Save function name token
const_TokenPtr function_name_tok = expect_token (IDENTIFIER);
if (function_name_tok == nullptr)
{
skip_after_next_block ();
return nullptr;
}
Identifier function_name = function_name_tok->get_str ();
// parse generic params - if exist
std::vector> generic_params
= parse_generic_params_in_angles ();
if (!skip_token (LEFT_PAREN))
{
Error error (lexer.peek_token ()->get_locus (),
"function declaration missing opening parentheses before "
"parameter list");
add_error (std::move (error));
skip_after_next_block ();
return nullptr;
}
// parse function parameters (only if next token isn't right paren)
std::vector function_params;
if (lexer.peek_token ()->get_id () != RIGHT_PAREN)
function_params
= parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; });
if (!skip_token (RIGHT_PAREN))
{
Error error (lexer.peek_token ()->get_locus (),
"function declaration missing closing parentheses after "
"parameter list");
add_error (std::move (error));
skip_after_next_block ();
return nullptr;
}
// parse function return type - if exists
std::unique_ptr return_type = parse_function_return_type ();
// parse where clause - if exists
AST::WhereClause where_clause = parse_where_clause ();
// parse block expression
std::unique_ptr block_expr = parse_block_expr ();
return std::unique_ptr (
new AST::Function (std::move (function_name), std::move (qualifiers),
std::move (generic_params), std::move (function_params),
std::move (return_type), std::move (where_clause),
std::move (block_expr), std::move (vis),
std::move (outer_attrs), locus));
}
// Parses function or method qualifiers (i.e. const, unsafe, and extern).
template
AST::FunctionQualifiers
Parser::parse_function_qualifiers ()
{
AsyncConstStatus const_status = NONE;
bool has_unsafe = false;
bool has_extern = false;
std::string abi;
// Check in order of const, unsafe, then extern
const_TokenPtr t = lexer.peek_token ();
Location locus = t->get_locus ();
switch (t->get_id ())
{
case CONST:
lexer.skip_token ();
const_status = CONST_FN;
break;
case ASYNC:
lexer.skip_token ();
const_status = ASYNC_FN;
break;
default:
// const status is still none
break;
}
if (lexer.peek_token ()->get_id () == UNSAFE)
{
lexer.skip_token ();
has_unsafe = true;
}
if (lexer.peek_token ()->get_id () == EXTERN_TOK)
{
lexer.skip_token ();
has_extern = true;
// detect optional abi name
const_TokenPtr next_tok = lexer.peek_token ();
if (next_tok->get_id () == STRING_LITERAL)
{
lexer.skip_token ();
abi = next_tok->get_str ();
}
}
return AST::FunctionQualifiers (locus, const_status, has_unsafe, has_extern,
std::move (abi));
}
// Parses generic (lifetime or type) params inside angle brackets (optional).
template
std::vector>
Parser::parse_generic_params_in_angles ()
{
if (lexer.peek_token ()->get_id () != LEFT_ANGLE)
{
// seems to be no generic params, so exit with empty vector
return std::vector> ();
}
lexer.skip_token ();
// DEBUG:
rust_debug ("skipped left angle in generic param");
std::vector> generic_params
= parse_generic_params (is_right_angle_tok);
// DEBUG:
rust_debug ("finished parsing actual generic params (i.e. inside angles)");
if (!skip_generics_right_angle ())
{
// DEBUG
rust_debug ("failed to skip generics right angle - returning empty "
"generic params");
return std::vector> ();
}
return generic_params;
}
template
template
std::unique_ptr
Parser::parse_generic_param (EndTokenPred is_end_token)
{
auto token = lexer.peek_token ();
auto outer_attrs = parse_outer_attribute ();
std::unique_ptr param;
switch (token->get_id ())
{
case LIFETIME: {
auto lifetime = parse_lifetime ();
if (lifetime.is_error ())
{
rust_error_at (
token->get_locus (),
"failed to parse lifetime in generic parameter list");
return nullptr;
}
std::vector lifetime_bounds;
if (lexer.peek_token ()->get_id () == COLON)
{
lexer.skip_token ();
// parse required bounds
lifetime_bounds
= parse_lifetime_bounds ([is_end_token] (TokenId id) {
return is_end_token (id) || id == COMMA;
});
}
param = std::unique_ptr (new AST::LifetimeParam (
std::move (lifetime), std::move (lifetime_bounds),
std::move (outer_attrs), token->get_locus ()));
break;
}
case IDENTIFIER: {
auto type_ident = token->get_str ();
lexer.skip_token ();
std::vector> type_param_bounds;
if (lexer.peek_token ()->get_id () == COLON)
{
lexer.skip_token ();
// parse optional type param bounds
type_param_bounds = parse_type_param_bounds ();
}
std::unique_ptr type = nullptr;
if (lexer.peek_token ()->get_id () == EQUAL)
{
lexer.skip_token ();
// parse required type
type = parse_type ();
if (!type)
{
rust_error_at (
lexer.peek_token ()->get_locus (),
"failed to parse type in type param in generic params");
return nullptr;
}
}
param = std::unique_ptr (
new AST::TypeParam (std::move (type_ident), token->get_locus (),
std::move (type_param_bounds), std::move (type),
std::move (outer_attrs)));
break;
}
case CONST: {
lexer.skip_token ();
auto name_token = expect_token (IDENTIFIER);
if (!name_token || !expect_token (COLON))
return nullptr;
auto type = parse_type ();
if (!type)
return nullptr;
// optional default value
auto default_expr = AST::GenericArg::create_error ();
if (lexer.peek_token ()->get_id () == EQUAL)
{
lexer.skip_token ();
auto tok = lexer.peek_token ();
default_expr = parse_generic_arg ();
if (default_expr.is_error ())
rust_error_at (tok->get_locus (),
"invalid token for start of default value for "
"const generic parameter: expected %, "
"% or %, got %qs",
token_id_to_str (tok->get_id ()));
// At this point, we *know* that we are parsing a const
// expression
if (default_expr.get_kind () == AST::GenericArg::Kind::Either)
default_expr = default_expr.disambiguate_to_const ();
}
param = std::unique_ptr (
new AST::ConstGenericParam (name_token->get_str (), std::move (type),
default_expr, std::move (outer_attrs),
token->get_locus ()));
break;
}
default:
// FIXME: Can we clean this last call with a method call?
rust_error_at (token->get_locus (),
"unexpected token when parsing generic parameters: %qs",
token->get_str ().c_str ());
return nullptr;
}
return param;
}
/* Parse generic (lifetime or type) params NOT INSIDE ANGLE BRACKETS!!! Almost
* always parse_generic_params_in_angles is what is wanted. */
template
template
std::vector>
Parser::parse_generic_params (EndTokenPred is_end_token)
{
std::vector> generic_params;
/* can't parse lifetime and type params separately due to lookahead issues
* thus, parse them all here */
/* HACK: used to retain attribute data if a lifetime param is tentatively
* parsed but it turns out to be type param */
AST::Attribute parsed_outer_attr = AST::Attribute::create_empty ();
// Did we parse a generic type param yet
auto type_seen = false;
// Did the user write a lifetime parameter after a type one
auto order_error = false;
// parse lifetime params
while (!is_end_token (lexer.peek_token ()->get_id ()))
{
auto param = parse_generic_param (is_end_token);
if (param)
{
// TODO: Handle `Const` here as well if necessary
if (param->get_kind () == AST::GenericParam::Kind::Type)
type_seen = true;
else if (param->get_kind () == AST::GenericParam::Kind::Lifetime
&& type_seen)
order_error = true;
generic_params.emplace_back (std::move (param));
maybe_skip_token (COMMA);
}
}
// FIXME: Add reordering hint
if (order_error)
rust_error_at (generic_params.front ()->get_locus (),
"invalid order for generic parameters: lifetimes should "
"always come before types");
generic_params.shrink_to_fit ();
return generic_params;
}
/* Parses lifetime generic parameters (pointers). Will also consume any
* trailing comma. No extra checks for end token. */
template
std::vector>
Parser::parse_lifetime_params ()
{
std::vector> lifetime_params;
while (lexer.peek_token ()->get_id () != END_OF_FILE)
{
AST::LifetimeParam lifetime_param = parse_lifetime_param ();
if (lifetime_param.is_error ())
{
// can't treat as error as only way to get out with trailing comma
break;
}
lifetime_params.push_back (std::unique_ptr (
new AST::LifetimeParam (std::move (lifetime_param))));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
lifetime_params.shrink_to_fit ();
return lifetime_params;
}
/* Parses lifetime generic parameters (pointers). Will also consume any
* trailing comma. Has extra is_end_token predicate checking. */
template
template
std::vector>
Parser::parse_lifetime_params (EndTokenPred is_end_token)
{
std::vector> lifetime_params;
// if end_token is not specified, it defaults to EOF, so should work fine
while (!is_end_token (lexer.peek_token ()->get_id ()))
{
AST::LifetimeParam lifetime_param = parse_lifetime_param ();
if (lifetime_param.is_error ())
{
/* TODO: is it worth throwing away all lifetime params just because
* one failed? */
Error error (lexer.peek_token ()->get_locus (),
"failed to parse lifetime param in lifetime params");
add_error (std::move (error));
return {};
}
lifetime_params.push_back (std::unique_ptr (
new AST::LifetimeParam (std::move (lifetime_param))));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
lifetime_params.shrink_to_fit ();
return lifetime_params;
}
/* Parses lifetime generic parameters (objects). Will also consume any
* trailing comma. No extra checks for end token.
* TODO: is this best solution? implements most of the same algorithm. */
template
std::vector
Parser::parse_lifetime_params_objs ()
{
std::vector lifetime_params;
// bad control structure as end token cannot be guaranteed
while (true)
{
AST::LifetimeParam lifetime_param = parse_lifetime_param ();
if (lifetime_param.is_error ())
{
// not an error as only way to exit if trailing comma
break;
}
lifetime_params.push_back (std::move (lifetime_param));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
lifetime_params.shrink_to_fit ();
return lifetime_params;
}
/* Parses lifetime generic parameters (objects). Will also consume any
* trailing comma. Has extra is_end_token predicate checking.
* TODO: is this best solution? implements most of the same algorithm. */
template
template
std::vector
Parser::parse_lifetime_params_objs (
EndTokenPred is_end_token)
{
std::vector lifetime_params;
while (!is_end_token (lexer.peek_token ()->get_id ()))
{
AST::LifetimeParam lifetime_param = parse_lifetime_param ();
if (lifetime_param.is_error ())
{
/* TODO: is it worth throwing away all lifetime params just because
* one failed? */
Error error (lexer.peek_token ()->get_locus (),
"failed to parse lifetime param in lifetime params");
add_error (std::move (error));
return {};
}
lifetime_params.push_back (std::move (lifetime_param));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
lifetime_params.shrink_to_fit ();
return lifetime_params;
}
/* Parses a sequence of a certain grammar rule in object form (not pointer or
* smart pointer), delimited by commas and ending when 'is_end_token' is
* satisfied (templated). Will also consume any trailing comma.
* FIXME: this cannot be used due to member function pointer problems (i.e.
* parsing_function cannot be specified properly) */
template
template
auto
Parser::parse_non_ptr_sequence (
ParseFunction parsing_function, EndTokenPred is_end_token,
std::string error_msg) -> std::vector
{
std::vector params;
while (!is_end_token (lexer.peek_token ()->get_id ()))
{
auto param = parsing_function ();
if (param.is_error ())
{
// TODO: is it worth throwing away all params just because one
// failed?
Error error (lexer.peek_token ()->get_locus (),
std::move (error_msg));
add_error (std::move (error));
return {};
}
params.push_back (std::move (param));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
params.shrink_to_fit ();
return params;
}
/* Parses a single lifetime generic parameter (not including comma). */
template
AST::LifetimeParam
Parser::parse_lifetime_param ()
{
// parse outer attribute, which is optional and may not exist
AST::Attribute outer_attr = parse_outer_attribute ();
// save lifetime token - required
const_TokenPtr lifetime_tok = lexer.peek_token ();
if (lifetime_tok->get_id () != LIFETIME)
{
// if lifetime is missing, must not be a lifetime param, so return null
return AST::LifetimeParam::create_error ();
}
lexer.skip_token ();
/* TODO: does this always create a named lifetime? or can a different type
* be made? */
AST::Lifetime lifetime (AST::Lifetime::NAMED, lifetime_tok->get_str (),
lifetime_tok->get_locus ());
// parse lifetime bounds, if it exists
std::vector lifetime_bounds;
if (lexer.peek_token ()->get_id () == COLON)
{
// parse lifetime bounds
lifetime_bounds = parse_lifetime_bounds ();
// TODO: have end token passed in?
}
return AST::LifetimeParam (std::move (lifetime), std::move (lifetime_bounds),
std::move (outer_attr),
lifetime_tok->get_locus ());
}
// Parses type generic parameters. Will also consume any trailing comma.
template
std::vector>
Parser::parse_type_params ()
{
std::vector> type_params;
// infinite loop with break on failure as no info on ending token
while (true)
{
std::unique_ptr type_param = parse_type_param ();
if (type_param == nullptr)
{
// break if fails to parse
break;
}
type_params.push_back (std::move (type_param));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
type_params.shrink_to_fit ();
return type_params;
}
// Parses type generic parameters. Will also consume any trailing comma.
template
template
std::vector>
Parser::parse_type_params (EndTokenPred is_end_token)
{
std::vector> type_params;
while (!is_end_token (lexer.peek_token ()->get_id ()))
{
std::unique_ptr type_param = parse_type_param ();
if (type_param == nullptr)
{
Error error (lexer.peek_token ()->get_locus (),
"failed to parse type param in type params");
add_error (std::move (error));
return {};
}
type_params.push_back (std::move (type_param));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
type_params.shrink_to_fit ();
return type_params;
/* TODO: this shares most code with parse_lifetime_params - good place to
* use template (i.e. parse_non_ptr_sequence if doable) */
}
/* Parses a single type (generic) parameter, not including commas. May change
* to return value. */
template
std::unique_ptr
Parser::parse_type_param ()
{
// parse outer attribute, which is optional and may not exist
AST::Attribute outer_attr = parse_outer_attribute ();
const_TokenPtr identifier_tok = lexer.peek_token ();
if (identifier_tok->get_id () != IDENTIFIER)
{
// return null as type param can't exist without this required
// identifier
return nullptr;
}
// TODO: create identifier from identifier token
Identifier ident = identifier_tok->get_str ();
lexer.skip_token ();
// parse type param bounds (if they exist)
std::vector> type_param_bounds;
if (lexer.peek_token ()->get_id () == COLON)
{
lexer.skip_token ();
// parse type param bounds, which may or may not exist
type_param_bounds = parse_type_param_bounds ();
}
// parse type (if it exists)
std::unique_ptr type = nullptr;
if (lexer.peek_token ()->get_id () == EQUAL)
{
lexer.skip_token ();
// parse type (now required)
type = parse_type ();
if (type == nullptr)
{
Error error (lexer.peek_token ()->get_locus (),
"failed to parse type in type param");
add_error (std::move (error));
return nullptr;
}
}
return std::unique_ptr (
new AST::TypeParam (std::move (ident), identifier_tok->get_locus (),
std::move (type_param_bounds), std::move (type),
std::move (outer_attr)));
}
/* Parses regular (i.e. non-generic) parameters in functions or methods. Also
* has end token handling. */
template
template
std::vector
Parser::parse_function_params (EndTokenPred is_end_token)
{
std::vector params;
if (is_end_token (lexer.peek_token ()->get_id ()))
return params;
AST::FunctionParam initial_param = parse_function_param ();
// Return empty parameter list if no parameter there
if (initial_param.is_error ())
{
// TODO: is this an error?
return params;
}
params.push_back (std::move (initial_param));
// maybe think of a better control structure here - do-while with an initial
// error state? basically, loop through parameter list until can't find any
// more params
const_TokenPtr t = lexer.peek_token ();
while (t->get_id () == COMMA)
{
// skip comma if applies
lexer.skip_token ();
// TODO: strictly speaking, shouldn't there be no trailing comma?
if (is_end_token (lexer.peek_token ()->get_id ()))
break;
// now, as right paren would break, function param is required
AST::FunctionParam param = parse_function_param ();
if (param.is_error ())
{
Error error (lexer.peek_token ()->get_locus (),
"failed to parse function param (in function params)");
add_error (std::move (error));
// skip somewhere?
return std::vector ();
}
params.push_back (std::move (param));
t = lexer.peek_token ();
}
params.shrink_to_fit ();
return params;
}
/* Parses a single regular (i.e. non-generic) parameter in a function or
* method, i.e. the "name: type" bit. Also handles it not existing. */
template
AST::FunctionParam
Parser::parse_function_param ()
{
// parse outer attributes if they exist
AST::AttrVec outer_attrs = parse_outer_attributes ();
// TODO: should saved location be at start of outer attributes or pattern?
Location locus = lexer.peek_token ()->get_locus ();
std::unique_ptr param_pattern = parse_pattern ();
// create error function param if it doesn't exist
if (param_pattern == nullptr)
{
// skip after something
return AST::FunctionParam::create_error ();
}
if (!skip_token (COLON))
{
// skip after something
return AST::FunctionParam::create_error ();
}
std::unique_ptr param_type = parse_type ();
if (param_type == nullptr)
{
// skip?
return AST::FunctionParam::create_error ();
}
return AST::FunctionParam (std::move (param_pattern), std::move (param_type),
std::move (outer_attrs), locus);
}
/* Parses a function or method return type syntactical construction. Also
* handles a function return type not existing. */
template
std::unique_ptr
Parser::parse_function_return_type ()
{
if (lexer.peek_token ()->get_id () != RETURN_TYPE)
return nullptr;
// skip return type, as it now obviously exists
lexer.skip_token ();
std::unique_ptr type = parse_type ();
return type;
}
/* Parses a "where clause" (in a function, struct, method, etc.). Also handles
* a where clause not existing, in which it will return
* WhereClause::create_empty(), which can be checked via
* WhereClause::is_empty(). */
template
AST::WhereClause
Parser::parse_where_clause ()
{
const_TokenPtr where_tok = lexer.peek_token ();
if (where_tok->get_id () != WHERE)
{
// where clause doesn't exist, so create empty one
return AST::WhereClause::create_empty ();
}
lexer.skip_token ();
/* parse where clause items - this is not a separate rule in the reference
* so won't be here */
std::vector> where_clause_items;
/* HACK: where clauses end with a right curly or semicolon or equals in all
* uses currently */
const_TokenPtr t = lexer.peek_token ();
while (t->get_id () != LEFT_CURLY && t->get_id () != SEMICOLON
&& t->get_id () != EQUAL)
{
std::unique_ptr where_clause_item
= parse_where_clause_item ();
if (where_clause_item == nullptr)
{
Error error (t->get_locus (), "failed to parse where clause item");
add_error (std::move (error));
return AST::WhereClause::create_empty ();
}
where_clause_items.push_back (std::move (where_clause_item));
// also skip comma if it exists
if (lexer.peek_token ()->get_id () != COMMA)
break;
lexer.skip_token ();
t = lexer.peek_token ();
}
where_clause_items.shrink_to_fit ();
return AST::WhereClause (std::move (where_clause_items));
}
/* Parses a where clause item (lifetime or type bound). Does not parse any
* commas. */
template
std::unique_ptr
Parser::parse_where_clause_item ()
{
// shitty cheat way of determining lifetime or type bound - test for
// lifetime
const_TokenPtr t = lexer.peek_token ();
if (t->get_id () == LIFETIME)
return parse_lifetime_where_clause_item ();
else
return parse_type_bound_where_clause_item ();
}
// Parses a lifetime where clause item.
template
std::unique_ptr
Parser