// Copyright (C) 2020-2026 Free Software Foundation, Inc.
// This file is part of GCC.
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3. If not see
// .
/* Template implementation for Rust::Parser. Previously in rust-parse.cc (before
* Parser was template). Separated from rust-parse.h for readability. */
/* DO NOT INCLUDE ANYWHERE - this is automatically included
* by rust-parse-impl-*.cc
* This is also the reason why there are no include guards. */
#include "expected.h"
#include "rust-ast.h"
#include "rust-common.h"
#include "rust-expr.h"
#include "rust-item.h"
#include "rust-common.h"
#include "rust-parse.h"
#include "rust-token.h"
#define INCLUDE_ALGORITHM
#include "rust-diagnostics.h"
#include "rust-dir-owner.h"
#include "rust-keyword-values.h"
#include "rust-edition.h"
#include "rust-parse-error.h"
#include "optional.h"
namespace Rust {
/* HACK-y special handling for skipping a right angle token at the end of
* generic arguments.
* Currently, this replaces the "current token" with one that is identical
* except has the leading '>' removed (e.g. '>>' becomes '>'). This is bad
* for several reasons - it modifies the token stream to something that
* actually doesn't make syntactic sense, it may not worked if the token
* has already been skipped, etc. It was done because it would not
* actually require inserting new items into the token stream (which I
* thought would take more work to not mess up) and because I wasn't sure
* if the "already seen right angle" flag in the parser would work
* correctly.
* Those two other approaches listed are in my opinion actually better
* long-term - insertion is probably best as it reflects syntactically
* what occurs. On the other hand, I need to do a code audit to make sure
* that insertion doesn't mess anything up. So that's a FIXME. */
template
bool
Parser::skip_generics_right_angle ()
{
/* OK, new great idea. Have a lexer method called
* "split_current_token(TokenType newLeft, TokenType newRight)", which is
* called here with whatever arguments are appropriate. That lexer method
* handles "replacing" the current token with the "newLeft" and "inserting"
* the next token with the "newRight" (and creating a location, etc. for it)
*/
/* HACK: special handling for right shift '>>', greater or equal '>=', and
* right shift assig */
// '>>='
const_TokenPtr tok = lexer.peek_token ();
switch (tok->get_id ())
{
case RIGHT_ANGLE:
// this is good - skip token
lexer.skip_token ();
return true;
case RIGHT_SHIFT:
{
// new implementation that should be better
lexer.split_current_token (RIGHT_ANGLE, RIGHT_ANGLE);
lexer.skip_token ();
return true;
}
case GREATER_OR_EQUAL:
{
// new implementation that should be better
lexer.split_current_token (RIGHT_ANGLE, EQUAL);
lexer.skip_token ();
return true;
}
case RIGHT_SHIFT_EQ:
{
// new implementation that should be better
lexer.split_current_token (RIGHT_ANGLE, GREATER_OR_EQUAL);
lexer.skip_token ();
return true;
}
default:
add_error (Error (tok->get_locus (),
"expected %<>%> at end of generic argument - found %qs",
tok->get_token_description ()));
return false;
}
}
/* Gets left binding power for specified token.
* Not suitable for use at the moment or possibly ever because binding power
* cannot be purely determined from operator token with Rust grammar - e.g.
* method call and field access have
* different left binding powers but the same operator token. */
template
int
Parser::left_binding_power (const_TokenPtr token)
{
// HACK: called with "peek_token()", so lookahead is "peek_token(1)"
switch (token->get_id ())
{
/* TODO: issue here - distinguish between method calls and field access
* somehow? Also would have to distinguish between paths and function
* calls (:: operator), maybe more stuff. */
/* Current plan for tackling LBP - don't do it based on token, use
* lookahead. Or alternatively, only use Pratt parsing for OperatorExpr
* and handle other expressions without it. rustc only considers
* arithmetic, logical/relational, 'as',
* '?=', ranges, colons, and assignment to have operator precedence and
* associativity rules applicable. It then has
* a separate "ExprPrecedence" that also includes binary operators. */
// TODO: handle operator overloading - have a function replace the
// operator?
/*case DOT:
return LBP_DOT;*/
case SCOPE_RESOLUTION:
rust_debug (
"possible error - looked up LBP of scope resolution operator. should "
"be handled elsewhere.");
return LBP_PATH;
/* Resolved by lookahead HACK that should work with current code. If next
* token is identifier and token after that isn't parenthesised expression
* list, it is a field reference. */
case DOT:
if (lexer.peek_token (1)->get_id () == IDENTIFIER
&& lexer.peek_token (2)->get_id () != LEFT_PAREN)
{
return LBP_FIELD_EXPR;
}
return LBP_METHOD_CALL;
case LEFT_PAREN:
return LBP_FUNCTION_CALL;
case LEFT_SQUARE:
return LBP_ARRAY_REF;
// postfix question mark (i.e. error propagation expression)
case QUESTION_MARK:
return LBP_QUESTION_MARK;
case AS:
return LBP_AS;
case ASTERISK:
return LBP_MUL;
case DIV:
return LBP_DIV;
case PERCENT:
return LBP_MOD;
case PLUS:
return LBP_PLUS;
case MINUS:
return LBP_MINUS;
case LEFT_SHIFT:
return LBP_L_SHIFT;
case RIGHT_SHIFT:
return LBP_R_SHIFT;
// binary & operator
case AMP:
return LBP_AMP;
// binary ^ operator
case CARET:
return LBP_CARET;
// binary | operator
case PIPE:
return LBP_PIPE;
case EQUAL_EQUAL:
return LBP_EQUAL;
case NOT_EQUAL:
return LBP_NOT_EQUAL;
case RIGHT_ANGLE:
return LBP_GREATER_THAN;
case GREATER_OR_EQUAL:
return LBP_GREATER_EQUAL;
case LEFT_ANGLE:
return LBP_SMALLER_THAN;
case LESS_OR_EQUAL:
return LBP_SMALLER_EQUAL;
case LOGICAL_AND:
return LBP_LOGICAL_AND;
case OR:
return LBP_LOGICAL_OR;
case DOT_DOT:
return LBP_DOT_DOT;
case DOT_DOT_EQ:
return LBP_DOT_DOT_EQ;
case EQUAL:
return LBP_ASSIG;
case PLUS_EQ:
return LBP_PLUS_ASSIG;
case MINUS_EQ:
return LBP_MINUS_ASSIG;
case ASTERISK_EQ:
return LBP_MULT_ASSIG;
case DIV_EQ:
return LBP_DIV_ASSIG;
case PERCENT_EQ:
return LBP_MOD_ASSIG;
case AMP_EQ:
return LBP_AMP_ASSIG;
case PIPE_EQ:
return LBP_PIPE_ASSIG;
case CARET_EQ:
return LBP_CARET_ASSIG;
case LEFT_SHIFT_EQ:
return LBP_L_SHIFT_ASSIG;
case RIGHT_SHIFT_EQ:
return LBP_R_SHIFT_ASSIG;
/* HACK: float literal due to lexer misidentifying a dot then an integer as
* a float */
case FLOAT_LITERAL:
return LBP_FIELD_EXPR;
// field expr is same as tuple expr in precedence, i imagine
// TODO: is this needed anymore? lexer shouldn't do that anymore
// anything that can't appear in an infix position is given lowest priority
default:
return LBP_LOWEST;
}
}
// Returns true when current token is EOF.
template
bool
Parser::done_end_of_file ()
{
return lexer.peek_token ()->get_id () == END_OF_FILE;
}
// Parses a sequence of items within a module or the implicit top-level module
// in a crate
template
tl::expected>, Parse::Error::Items>
Parser::parse_items ()
{
std::vector> items;
const_TokenPtr t = lexer.peek_token ();
while (t->get_id () != END_OF_FILE)
{
auto item = parse_item (false);
if (!item)
return Parse::Error::Items::make_malformed (std::move (items));
items.push_back (std::move (item.value ()));
t = lexer.peek_token ();
}
// GCC 5->7 bug doesn't threat lvalue as an rvalue for the overload
#if __GNUC__ <= 7
return std::move (items);
#else
return items;
#endif
}
// Parses a crate (compilation unit) - entry point
template
std::unique_ptr
Parser::parse_crate ()
{
// parse inner attributes
AST::AttrVec inner_attrs = parse_inner_attributes ();
// parse items
auto items
= parse_items ().value_or (std::vector>{});
// emit all errors
for (const auto &error : error_table)
error.emit ();
return std::unique_ptr (
new AST::Crate (std::move (items), std::move (inner_attrs)));
}
// Parses an identifier/keyword as a Token
template
tl::expected, Parse::Error::Node>
Parser::parse_identifier_or_keyword_token ()
{
const_TokenPtr t = lexer.peek_token ();
if (t->get_id () == IDENTIFIER || token_id_is_keyword (t->get_id ()))
{
lexer.skip_token ();
return std::unique_ptr (new AST::Token (std::move (t)));
}
else
{
add_error (Error (t->get_locus (), "expected keyword or identifier"));
return tl::unexpected (Parse::Error::Node::MALFORMED);
}
}
template
bool
Parser::is_macro_rules_def (const_TokenPtr t)
{
auto macro_name = lexer.peek_token (2)->get_id ();
bool allowed_macro_name = (macro_name == IDENTIFIER || macro_name == TRY);
return t->get_str () == Values::WeakKeywords::MACRO_RULES
&& lexer.peek_token (1)->get_id () == EXCLAM && allowed_macro_name;
}
// Parses a single item
template
tl::expected, Parse::Error::Item>
Parser::parse_item (bool called_from_statement)
{
// has a "called_from_statement" parameter for better error message handling
// TODO: GCC 5 does not handle implicit return type correctly so we're forced
// to specify it almost every time until the baseline GCC gets bumped.
// Since this type is quite long and the code is dense we use an alias.
//
// When support for GCC 5 stops: remove this alias as well as the explicit
// ctor calls.
using RType = tl::expected, Parse::Error::Item>;
// parse outer attributes for item
AST::AttrVec outer_attrs = parse_outer_attributes ();
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case END_OF_FILE:
// not necessarily an error, unless we just read outer
// attributes which needs to be attached
if (!outer_attrs.empty ())
{
Rust::AST::Attribute attr = outer_attrs.back ();
Error error (attr.get_locus (),
"expected item after outer attribute or doc comment");
add_error (std::move (error));
}
return Parse::Error::Item::make_end_of_file ();
case ASYNC:
case PUB:
case MOD:
case EXTERN_KW:
case USE:
case FN_KW:
case TYPE:
case STRUCT_KW:
case ENUM_KW:
case CONST:
case STATIC_KW:
case AUTO:
case TRAIT:
case IMPL:
case MACRO:
/* TODO: implement union keyword but not really because of
* context-dependence crappy hack way to parse a union written below to
* separate it from the good code. */
// case UNION:
case UNSAFE: // maybe - unsafe traits are a thing
// if any of these (should be all possible VisItem prefixes), parse a
// VisItem
{
auto vis_item = parse_vis_item (std::move (outer_attrs));
if (!vis_item)
return Parse::Error::Item::make_malformed ();
return RType{std::move (vis_item)};
}
case SUPER:
case SELF:
case CRATE:
case DOLLAR_SIGN:
// almost certainly macro invocation semi
{
auto macro_invoc_semi
= parse_macro_invocation_semi (std::move (outer_attrs));
if (!macro_invoc_semi)
return Parse::Error::Item::make_malformed ();
return RType{std::move (macro_invoc_semi)};
}
// crappy hack to do union "keyword"
case IDENTIFIER:
// TODO: ensure std::string and literal comparison works
if (t->get_str () == Values::WeakKeywords::UNION
&& lexer.peek_token (1)->get_id () == IDENTIFIER)
{
auto vis_item = parse_vis_item (std::move (outer_attrs));
if (!vis_item)
return Parse::Error::Item::make_malformed ();
return RType{std::move (vis_item)};
// or should this go straight to parsing union?
}
else if (t->get_str () == Values::WeakKeywords::DEFAULT
&& lexer.peek_token (1)->get_id () != EXCLAM)
{
// parse normal functions with `default` qualifier
// they will be rejected in ASTValidation pass
return parse_vis_item (std::move (outer_attrs));
}
else if (is_macro_rules_def (t))
{
// macro_rules! macro item
auto macro_rule_def = parse_macro_rules_def (std::move (outer_attrs));
if (!macro_rule_def)
return Parse::Error::Item::make_malformed ();
return RType{std::move (macro_rule_def)};
}
else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION
|| lexer.peek_token (1)->get_id () == EXCLAM)
{
/* path (probably) or macro invocation, so probably a macro invocation
* semi */
auto macro_invocation_semi
= parse_macro_invocation_semi (std::move (outer_attrs));
if (!macro_invocation_semi)
return Parse::Error::Item::make_malformed ();
return RType{std::move (macro_invocation_semi)};
}
gcc_fallthrough ();
default:
// otherwise unrecognised
add_error (Error (t->get_locus (),
"unrecognised token %qs for start of %s",
t->get_token_description (),
called_from_statement ? "statement" : "item"));
// skip somewhere?
return Parse::Error::Item::make_malformed ();
break;
}
}
// Parses a VisItem (item that can have non-default visibility).
template
std::unique_ptr
Parser::parse_vis_item (AST::AttrVec outer_attrs)
{
// parse visibility, which may or may not exist
auto vis_res = parse_visibility ();
if (!vis_res)
return nullptr;
auto vis = vis_res.value ();
// select VisItem to create depending on keyword
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case MOD:
return parse_module (std::move (vis), std::move (outer_attrs));
case EXTERN_KW:
// lookahead to resolve syntactical production
t = lexer.peek_token (1);
switch (t->get_id ())
{
case CRATE:
return parse_extern_crate (std::move (vis), std::move (outer_attrs));
case FN_KW: // extern function
return parse_function (std::move (vis), std::move (outer_attrs));
case LEFT_CURLY: // extern block
return parse_extern_block (std::move (vis), std::move (outer_attrs));
case STRING_LITERAL: // for specifying extern ABI
// could be extern block or extern function, so more lookahead
t = lexer.peek_token (2);
switch (t->get_id ())
{
case FN_KW:
return parse_function (std::move (vis), std::move (outer_attrs));
case LEFT_CURLY:
return parse_extern_block (std::move (vis),
std::move (outer_attrs));
default:
add_error (
Error (t->get_locus (),
"unexpected token %qs in some sort of extern production",
t->get_token_description ()));
lexer.skip_token (2); // TODO: is this right thing to do?
return nullptr;
}
default:
add_error (
Error (t->get_locus (),
"unexpected token %qs in some sort of extern production",
t->get_token_description ()));
lexer.skip_token (1); // TODO: is this right thing to do?
return nullptr;
}
case USE:
return parse_use_decl (std::move (vis), std::move (outer_attrs));
case FN_KW:
return parse_function (std::move (vis), std::move (outer_attrs));
case TYPE:
return parse_type_alias (std::move (vis), std::move (outer_attrs));
case STRUCT_KW:
return parse_struct (std::move (vis), std::move (outer_attrs));
case ENUM_KW:
return parse_enum (std::move (vis), std::move (outer_attrs));
// TODO: implement union keyword but not really because of
// context-dependence case UNION: crappy hack to do union "keyword"
case IDENTIFIER:
if (t->get_str () == Values::WeakKeywords::UNION
&& lexer.peek_token (1)->get_id () == IDENTIFIER)
{
return parse_union (std::move (vis), std::move (outer_attrs));
// or should item switch go straight to parsing union?
}
else if (t->get_str () == Values::WeakKeywords::DEFAULT)
{
// parse normal functions with `default` qualifier they will be
// rejected in ASTValidation pass
return parse_function (std::move (vis), std::move (outer_attrs));
}
break;
case CONST:
// lookahead to resolve syntactical production
t = lexer.peek_token (1);
switch (t->get_id ())
{
case IDENTIFIER:
case UNDERSCORE:
return parse_const_item (std::move (vis), std::move (outer_attrs));
case ASYNC:
return parse_async_item (std::move (vis), std::move (outer_attrs));
case UNSAFE:
case EXTERN_KW:
case FN_KW:
return parse_function (std::move (vis), std::move (outer_attrs));
default:
add_error (
Error (t->get_locus (),
"unexpected token %qs in some sort of const production",
t->get_token_description ()));
lexer.skip_token (1); // TODO: is this right thing to do?
return nullptr;
}
// for async functions
case ASYNC:
return parse_async_item (std::move (vis), std::move (outer_attrs));
case STATIC_KW:
return parse_static_item (std::move (vis), std::move (outer_attrs));
case AUTO:
case TRAIT:
return parse_trait (std::move (vis), std::move (outer_attrs));
case IMPL:
return parse_impl (std::move (vis), std::move (outer_attrs));
case UNSAFE: // unsafe traits, unsafe functions, unsafe impls (trait impls),
// lookahead to resolve syntactical production
t = lexer.peek_token (1);
switch (t->get_id ())
{
case AUTO:
case TRAIT:
return parse_trait (std::move (vis), std::move (outer_attrs));
case EXTERN_KW:
case FN_KW:
return parse_function (std::move (vis), std::move (outer_attrs));
case IMPL:
return parse_impl (std::move (vis), std::move (outer_attrs));
case MOD:
return parse_module (std::move (vis), std::move (outer_attrs));
default:
add_error (
Error (t->get_locus (),
"unexpected token %qs in some sort of unsafe production",
t->get_token_description ()));
lexer.skip_token (1); // TODO: is this right thing to do?
return nullptr;
}
case MACRO:
return parse_decl_macro_def (std::move (vis), std::move (outer_attrs));
default:
// otherwise vis item clearly doesn't exist, which is not an error
// has a catch-all post-switch return to allow other breaks to occur
break;
}
return nullptr;
}
template
std::unique_ptr
Parser::parse_async_item (AST::Visibility vis,
AST::AttrVec outer_attrs)
{
auto offset = (lexer.peek_token ()->get_id () == CONST) ? 1 : 0;
const_TokenPtr t = lexer.peek_token (offset);
if (get_rust_edition () == Edition::E2015)
{
add_error (Error (t->get_locus (), ErrorCode::E0670,
"% is not permitted in Rust 2015"));
add_error (
Error::Hint (t->get_locus (),
"to use %, switch to Rust 2018 or later"));
}
t = lexer.peek_token (offset + 1);
switch (t->get_id ())
{
case UNSAFE:
case FN_KW:
return parse_function (std::move (vis), std::move (outer_attrs));
default:
add_error (
Error (t->get_locus (), "expected item, found keyword %"));
lexer.skip_token (1);
return nullptr;
}
}
// Parses a macro rules definition syntax extension whatever thing.
template
std::unique_ptr
Parser::parse_macro_rules_def (AST::AttrVec outer_attrs)
{
// ensure that first token is identifier saying "macro_rules"
const_TokenPtr t = lexer.peek_token ();
if (t->get_id () != IDENTIFIER
|| t->get_str () != Values::WeakKeywords::MACRO_RULES)
{
Error error (
t->get_locus (),
"macro rules definition does not start with %");
add_error (std::move (error));
// skip after somewhere?
return nullptr;
}
lexer.skip_token ();
location_t macro_locus = t->get_locus ();
if (!skip_token (EXCLAM))
{
// skip after somewhere?
return nullptr;
}
// parse macro name
const_TokenPtr ident_tok = expect_token (IDENTIFIER);
if (ident_tok == nullptr)
{
return nullptr;
}
Identifier rule_name{ident_tok};
// DEBUG
rust_debug ("in macro rules def, about to parse parens.");
// save delim type to ensure it is reused later
AST::DelimType delim_type = AST::PARENS;
// Map tokens to DelimType
t = lexer.peek_token ();
switch (t->get_id ())
{
case LEFT_PAREN:
delim_type = AST::PARENS;
break;
case LEFT_SQUARE:
delim_type = AST::SQUARE;
break;
case LEFT_CURLY:
delim_type = AST::CURLY;
break;
default:
add_error (Error (t->get_locus (),
"unexpected token %qs - expecting delimiters (for a "
"macro rules definition)",
t->get_token_description ()));
return nullptr;
}
lexer.skip_token ();
// parse actual macro rules
std::vector macro_rules;
// must be at least one macro rule, so parse it
AST::MacroRule initial_rule = parse_macro_rule ();
if (initial_rule.is_error ())
{
Error error (lexer.peek_token ()->get_locus (),
"required first macro rule in macro rules definition "
"could not be parsed");
add_error (std::move (error));
// skip after somewhere?
return nullptr;
}
macro_rules.push_back (std::move (initial_rule));
// DEBUG
rust_debug ("successfully pushed back initial macro rule");
t = lexer.peek_token ();
// parse macro rules
while (t->get_id () == SEMICOLON)
{
// skip semicolon
lexer.skip_token ();
// don't parse if end of macro rules
if (Parse::Utils::token_id_matches_delims (lexer.peek_token ()->get_id (),
delim_type))
{
// DEBUG
rust_debug (
"broke out of parsing macro rules loop due to finding delim");
break;
}
// try to parse next rule
AST::MacroRule rule = parse_macro_rule ();
if (rule.is_error ())
{
Error error (lexer.peek_token ()->get_locus (),
"failed to parse macro rule in macro rules definition");
add_error (std::move (error));
return nullptr;
}
macro_rules.push_back (std::move (rule));
// DEBUG
rust_debug ("successfully pushed back another macro rule");
t = lexer.peek_token ();
}
// parse end delimiters
t = lexer.peek_token ();
if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type))
{
// tokens match opening delimiter, so skip.
lexer.skip_token ();
if (delim_type != AST::CURLY)
{
// skip semicolon at end of non-curly macro definitions
if (!skip_token (SEMICOLON))
{
// as this is the end, allow recovery (probably) - may change
return std::unique_ptr (
AST::MacroRulesDefinition::mbe (
std::move (rule_name), delim_type, std::move (macro_rules),
std::move (outer_attrs), macro_locus));
}
}
return std::unique_ptr (
AST::MacroRulesDefinition::mbe (std::move (rule_name), delim_type,
std::move (macro_rules),
std::move (outer_attrs), macro_locus));
}
else
{
// tokens don't match opening delimiters, so produce error
Error error (t->get_locus (),
"unexpected token %qs - expecting closing delimiter %qs "
"(for a macro rules definition)",
t->get_token_description (),
(delim_type == AST::PARENS
? ")"
: (delim_type == AST::SQUARE ? "]" : "}")));
add_error (std::move (error));
/* return empty macro definiton despite possibly parsing mostly valid one
* - TODO is this a good idea? */
return nullptr;
}
}
// Parses a declarative macro 2.0 definition.
template
std::unique_ptr
Parser::parse_decl_macro_def (AST::Visibility vis,
AST::AttrVec outer_attrs)
{
// ensure that first token is identifier saying "macro"
const_TokenPtr t = lexer.peek_token ();
if (t->get_id () != MACRO)
{
Error error (
t->get_locus (),
"declarative macro definition does not start with %");
add_error (std::move (error));
// skip after somewhere?
return nullptr;
}
lexer.skip_token ();
location_t macro_locus = t->get_locus ();
// parse macro name
const_TokenPtr ident_tok = expect_token (IDENTIFIER);
if (ident_tok == nullptr)
{
return nullptr;
}
Identifier rule_name{ident_tok};
t = lexer.peek_token ();
if (t->get_id () == LEFT_PAREN)
{
// single definiton of macro rule
// e.g. `macro foo($e:expr) {}`
// parse macro matcher
location_t locus = lexer.peek_token ()->get_locus ();
AST::MacroMatcher matcher = parse_macro_matcher ();
if (matcher.is_error ())
return nullptr;
// check delimiter of macro matcher
if (matcher.get_delim_type () != AST::DelimType::PARENS)
{
Error error (locus, "only parenthesis can be used for a macro "
"matcher in declarative macro definition");
add_error (std::move (error));
return nullptr;
}
location_t transcriber_loc = lexer.peek_token ()->get_locus ();
auto delim_tok_tree = parse_delim_token_tree ();
if (!delim_tok_tree)
return nullptr;
AST::MacroTranscriber transcriber (delim_tok_tree.value (),
transcriber_loc);
if (transcriber.get_token_tree ().get_delim_type ()
!= AST::DelimType::CURLY)
{
Error error (transcriber_loc,
"only braces can be used for a macro transcriber "
"in declarative macro definition");
add_error (std::move (error));
return nullptr;
}
std::vector macro_rules;
macro_rules.emplace_back (std::move (matcher), std::move (transcriber),
locus);
return std::unique_ptr (
AST::MacroRulesDefinition::decl_macro (std::move (rule_name),
macro_rules,
std::move (outer_attrs),
macro_locus, vis));
}
else if (t->get_id () == LEFT_CURLY)
{
// multiple definitions of macro rule separated by comma
// e.g. `macro foo { () => {}, ($e:expr) => {}, }`
// parse left curly
const_TokenPtr left_curly = expect_token (LEFT_CURLY);
if (left_curly == nullptr)
{
return nullptr;
}
// parse actual macro rules
std::vector macro_rules;
// must be at least one macro rule, so parse it
AST::MacroRule initial_rule = parse_macro_rule ();
if (initial_rule.is_error ())
{
Error error (
lexer.peek_token ()->get_locus (),
"required first macro rule in declarative macro definition "
"could not be parsed");
add_error (std::move (error));
// skip after somewhere?
return nullptr;
}
macro_rules.push_back (std::move (initial_rule));
t = lexer.peek_token ();
// parse macro rules
while (t->get_id () == COMMA)
{
// skip comma
lexer.skip_token ();
// don't parse if end of macro rules
if (Parse::Utils::token_id_matches_delims (
lexer.peek_token ()->get_id (), AST::CURLY))
{
break;
}
// try to parse next rule
AST::MacroRule rule = parse_macro_rule ();
if (rule.is_error ())
{
Error error (
lexer.peek_token ()->get_locus (),
"failed to parse macro rule in declarative macro definition");
add_error (std::move (error));
return nullptr;
}
macro_rules.push_back (std::move (rule));
t = lexer.peek_token ();
}
// parse right curly
const_TokenPtr right_curly = expect_token (RIGHT_CURLY);
if (right_curly == nullptr)
{
return nullptr;
}
return std::unique_ptr (
AST::MacroRulesDefinition::decl_macro (std::move (rule_name),
std::move (macro_rules),
std::move (outer_attrs),
macro_locus, vis));
}
else
{
add_error (Error (t->get_locus (),
"unexpected token %qs - expecting delimiters "
"(for a declarative macro definiton)",
t->get_token_description ()));
return nullptr;
}
}
/* Parses a visibility syntactical production (i.e. creating a non-default
* visibility) */
template
tl::expected
Parser::parse_visibility ()
{
// check for no visibility
if (lexer.peek_token ()->get_id () != PUB)
{
return AST::Visibility::create_private ();
}
auto vis_loc = lexer.peek_token ()->get_locus ();
lexer.skip_token ();
// create simple pub visibility if
// - found no parentheses
// - found unit type `()`
if (lexer.peek_token ()->get_id () != LEFT_PAREN
|| lexer.peek_token (1)->get_id () == RIGHT_PAREN)
{
return AST::Visibility::create_public (vis_loc);
// or whatever
}
lexer.skip_token ();
const_TokenPtr t = lexer.peek_token ();
auto path_loc = t->get_locus ();
switch (t->get_id ())
{
case CRATE:
lexer.skip_token ();
skip_token (RIGHT_PAREN);
return AST::Visibility::create_crate (path_loc, vis_loc);
case SELF:
lexer.skip_token ();
skip_token (RIGHT_PAREN);
return AST::Visibility::create_self (path_loc, vis_loc);
case SUPER:
lexer.skip_token ();
skip_token (RIGHT_PAREN);
return AST::Visibility::create_super (path_loc, vis_loc);
case IN:
{
lexer.skip_token ();
// parse the "in" path as well
auto path = parse_simple_path ();
if (!path)
{
Error error (lexer.peek_token ()->get_locus (),
"missing path in pub(in path) visibility");
add_error (std::move (error));
// skip after somewhere?
return Parse::Error::Visibility::make_missing_path ();
}
skip_token (RIGHT_PAREN);
return AST::Visibility::create_in_path (std::move (path.value ()),
vis_loc);
}
default:
add_error (Error (t->get_locus (), "unexpected token %qs in visibility",
t->get_token_description ()));
lexer.skip_token ();
return Parse::Error::Visibility::make_malformed ();
}
}
// Parses a module - either a bodied module or a module defined in another file.
template
std::unique_ptr
Parser::parse_module (AST::Visibility vis,
AST::AttrVec outer_attrs)
{
location_t locus = lexer.peek_token ()->get_locus ();
Unsafety safety = Unsafety::Normal;
if (lexer.peek_token ()->get_id () == UNSAFE)
{
safety = Unsafety::Unsafe;
skip_token (UNSAFE);
}
skip_token (MOD);
const_TokenPtr module_name = expect_token (IDENTIFIER);
if (module_name == nullptr)
{
return nullptr;
}
Identifier name{module_name};
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case SEMICOLON:
lexer.skip_token ();
// Construct an external module
return std::unique_ptr (
new AST::Module (std::move (name), std::move (vis),
std::move (outer_attrs), locus, safety,
lexer.get_filename (), inline_module_stack));
case LEFT_CURLY:
{
lexer.skip_token ();
// parse inner attributes
AST::AttrVec inner_attrs = parse_inner_attributes ();
std::string default_path = name.as_string ();
if (inline_module_stack.empty ())
{
std::string filename = lexer.get_filename ();
auto slash_idx = filename.rfind (file_separator);
if (slash_idx == std::string::npos)
slash_idx = 0;
else
slash_idx++;
filename = filename.substr (slash_idx);
std::string subdir;
if (get_file_subdir (filename, subdir))
default_path = subdir + file_separator + name.as_string ();
}
std::string module_path_name
= extract_module_path (inner_attrs, outer_attrs, default_path);
InlineModuleStackScope scope (*this, std::move (module_path_name));
// parse items
std::vector> items;
const_TokenPtr tok = lexer.peek_token ();
while (tok->get_id () != RIGHT_CURLY)
{
auto item = parse_item (false);
if (!item)
{
Error error (tok->get_locus (),
"failed to parse item in module");
add_error (std::move (error));
return nullptr;
}
items.push_back (std::move (item.value ()));
tok = lexer.peek_token ();
}
if (!skip_token (RIGHT_CURLY))
{
// skip somewhere?
return nullptr;
}
return std::unique_ptr (
new AST::Module (std::move (name), locus, std::move (items),
std::move (vis), safety, std::move (inner_attrs),
std::move (outer_attrs))); // module name?
}
default:
add_error (
Error (t->get_locus (),
"unexpected token %qs in module declaration/definition item",
t->get_token_description ()));
lexer.skip_token ();
return nullptr;
}
}
// Parses an extern crate declaration (dependency on external crate)
template
std::unique_ptr
Parser::parse_extern_crate (AST::Visibility vis,
AST::AttrVec outer_attrs)
{
location_t locus = lexer.peek_token ()->get_locus ();
if (!skip_token (EXTERN_KW))
{
skip_after_semicolon ();
return nullptr;
}
if (!skip_token (CRATE))
{
skip_after_semicolon ();
return nullptr;
}
/* parse crate reference name - this has its own syntactical rule in reference
* but seems to not be used elsewhere, so i'm putting it here */
const_TokenPtr crate_name_tok = lexer.peek_token ();
std::string crate_name;
switch (crate_name_tok->get_id ())
{
case IDENTIFIER:
crate_name = crate_name_tok->get_str ();
lexer.skip_token ();
break;
case SELF:
crate_name = Values::Keywords::SELF;
lexer.skip_token ();
break;
default:
add_error (
Error (crate_name_tok->get_locus (),
"expecting crate name (identifier or %), found %qs",
crate_name_tok->get_token_description ()));
skip_after_semicolon ();
return nullptr;
}
// don't parse as clause if it doesn't exist
if (lexer.peek_token ()->get_id () == SEMICOLON)
{
lexer.skip_token ();
return std::unique_ptr (
new AST::ExternCrate (std::move (crate_name), std::move (vis),
std::move (outer_attrs), locus));
}
/* parse as clause - this also has its own syntactical rule in reference and
* also seems to not be used elsewhere, so including here again. */
if (!skip_token (AS))
{
skip_after_semicolon ();
return nullptr;
}
const_TokenPtr as_name_tok = lexer.peek_token ();
std::string as_name;
switch (as_name_tok->get_id ())
{
case IDENTIFIER:
as_name = as_name_tok->get_str ();
lexer.skip_token ();
break;
case UNDERSCORE:
as_name = Values::Keywords::UNDERSCORE;
lexer.skip_token ();
break;
default:
add_error (
Error (as_name_tok->get_locus (),
"expecting as clause name (identifier or %<_%>), found %qs",
as_name_tok->get_token_description ()));
skip_after_semicolon ();
return nullptr;
}
if (!skip_token (SEMICOLON))
{
skip_after_semicolon ();
return nullptr;
}
return std::unique_ptr (
new AST::ExternCrate (std::move (crate_name), std::move (vis),
std::move (outer_attrs), locus, std::move (as_name)));
}
// Parses a use declaration.
template
std::unique_ptr
Parser::parse_use_decl (AST::Visibility vis,
AST::AttrVec outer_attrs)
{
location_t locus = lexer.peek_token ()->get_locus ();
if (!skip_token (USE))
{
skip_after_semicolon ();
return nullptr;
}
// parse use tree, which is required
std::unique_ptr use_tree = parse_use_tree ();
if (use_tree == nullptr)
{
Error error (lexer.peek_token ()->get_locus (),
"could not parse use tree in use declaration");
add_error (std::move (error));
skip_after_semicolon ();
return nullptr;
}
if (!skip_token (SEMICOLON))
{
skip_after_semicolon ();
return nullptr;
}
return std::unique_ptr (
new AST::UseDeclaration (std::move (use_tree), std::move (vis),
std::move (outer_attrs), locus));
}
// Parses a use tree (which can be recursive and is actually a base class).
template
std::unique_ptr
Parser::parse_use_tree ()
{
/* potential syntax definitions in attempt to get algorithm:
* Glob:
* <- SimplePath :: *
* <- :: *
* <- *
* Nested tree thing:
* <- SimplePath :: { COMPLICATED_INNER_TREE_THING }
* <- :: COMPLICATED_INNER_TREE_THING }
* <- { COMPLICATED_INNER_TREE_THING }
* Rebind thing:
* <- SimplePath as IDENTIFIER
* <- SimplePath as _
* <- SimplePath
*/
/* current plan of attack: try to parse SimplePath first - if fails, one of
* top two then try parse :: - if fails, one of top two. Next is deciding
* character for top two. */
/* Thus, parsing smaller parts of use tree may require feeding into function
* via parameters (or could handle all in this single function because other
* use tree types aren't recognised as separate in the spec) */
// TODO: I think this function is too complex, probably should split it
location_t locus = lexer.peek_token ()->get_locus ();
// bool has_path = false;
auto path = parse_simple_path ();
if (!path)
{
// has no path, so must be glob or nested tree UseTree type
bool is_global = false;
// check for global scope resolution operator
if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION)
{
lexer.skip_token ();
is_global = true;
}
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case ASTERISK:
// glob UseTree type
lexer.skip_token ();
if (is_global)
return std::unique_ptr (
new AST::UseTreeGlob (AST::UseTreeGlob::GLOBAL,
AST::SimplePath::create_empty (), locus));
else
return std::unique_ptr (
new AST::UseTreeGlob (AST::UseTreeGlob::NO_PATH,
AST::SimplePath::create_empty (), locus));
case LEFT_CURLY:
{
// nested tree UseTree type
lexer.skip_token ();
std::vector> use_trees;
const_TokenPtr t = lexer.peek_token ();
while (t->get_id () != RIGHT_CURLY)
{
std::unique_ptr use_tree = parse_use_tree ();
if (use_tree == nullptr)
{
break;
}
use_trees.push_back (std::move (use_tree));
if (lexer.peek_token ()->get_id () != COMMA)
break;
lexer.skip_token ();
t = lexer.peek_token ();
}
// skip end curly delimiter
if (!skip_token (RIGHT_CURLY))
{
// skip after somewhere?
return nullptr;
}
if (is_global)
return std::unique_ptr (
new AST::UseTreeList (AST::UseTreeList::GLOBAL,
AST::SimplePath::create_empty (),
std::move (use_trees), locus));
else
return std::unique_ptr (
new AST::UseTreeList (AST::UseTreeList::NO_PATH,
AST::SimplePath::create_empty (),
std::move (use_trees), locus));
}
case AS:
// this is not allowed
add_error (Error (
t->get_locus (),
"use declaration with rebind % requires a valid simple path - "
"none found"));
skip_after_semicolon ();
return nullptr;
default:
add_error (Error (t->get_locus (),
"unexpected token %qs in use tree with "
"no valid simple path (i.e. list"
" or glob use tree)",
t->get_token_description ()));
skip_after_semicolon ();
return nullptr;
}
}
else
{
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case AS:
{
// rebind UseTree type
lexer.skip_token ();
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case IDENTIFIER:
// skip lexer token
lexer.skip_token ();
return std::unique_ptr (
new AST::UseTreeRebind (AST::UseTreeRebind::IDENTIFIER,
std::move (path.value ()), locus, t));
case UNDERSCORE:
// skip lexer token
lexer.skip_token ();
return std::unique_ptr (
new AST::UseTreeRebind (AST::UseTreeRebind::WILDCARD,
std::move (path.value ()), locus,
{Values::Keywords::UNDERSCORE,
t->get_locus ()}));
default:
add_error (Error (
t->get_locus (),
"unexpected token %qs in use tree with as clause - expected "
"identifier or %<_%>",
t->get_token_description ()));
skip_after_semicolon ();
return nullptr;
}
}
case SEMICOLON:
// rebind UseTree type without rebinding - path only
// don't skip semicolon - handled in parse_use_tree
// lexer.skip_token();
case COMMA:
case RIGHT_CURLY:
// this may occur in recursive calls - assume it is ok and ignore it
return std::unique_ptr (
new AST::UseTreeRebind (AST::UseTreeRebind::NONE,
std::move (path.value ()), locus));
case SCOPE_RESOLUTION:
// keep going
break;
default:
add_error (Error (t->get_locus (),
"unexpected token %qs in use tree with valid path",
t->get_token_description ()));
return nullptr;
}
skip_token ();
t = lexer.peek_token ();
switch (t->get_id ())
{
case ASTERISK:
// glob UseTree type
lexer.skip_token ();
return std::unique_ptr (
new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED,
std::move (path.value ()), locus));
case LEFT_CURLY:
{
// nested tree UseTree type
lexer.skip_token ();
std::vector> use_trees;
// TODO: think of better control structure
const_TokenPtr t = lexer.peek_token ();
while (t->get_id () != RIGHT_CURLY)
{
std::unique_ptr use_tree = parse_use_tree ();
if (use_tree == nullptr)
{
break;
}
use_trees.push_back (std::move (use_tree));
if (lexer.peek_token ()->get_id () != COMMA)
break;
lexer.skip_token ();
t = lexer.peek_token ();
}
// skip end curly delimiter
if (!skip_token (RIGHT_CURLY))
{
// skip after somewhere?
return nullptr;
}
return std::unique_ptr (
new AST::UseTreeList (AST::UseTreeList::PATH_PREFIXED,
std::move (path.value ()),
std::move (use_trees), locus));
}
default:
add_error (Error (t->get_locus (),
"unexpected token %qs in use tree with valid path",
t->get_token_description ()));
// skip_after_semicolon();
return nullptr;
}
}
}
// Parses a function (not a method).
template
std::unique_ptr
Parser::parse_function (AST::Visibility vis,
AST::AttrVec outer_attrs,
bool is_external)
{
location_t locus = lexer.peek_token ()->get_locus ();
// Get qualifiers for function if they exist
auto qualifiers = parse_function_qualifiers ();
if (!qualifiers)
return nullptr;
skip_token (FN_KW);
// Save function name token
const_TokenPtr function_name_tok = expect_token (IDENTIFIER);
if (function_name_tok == nullptr)
{
skip_after_next_block ();
return nullptr;
}
Identifier function_name{function_name_tok};
// parse generic params - if exist
std::vector> generic_params
= parse_generic_params_in_angles ();
if (!skip_token (LEFT_PAREN))
{
Error error (lexer.peek_token ()->get_locus (),
"function declaration missing opening parentheses before "
"parameter list");
add_error (std::move (error));
skip_after_next_block ();
return nullptr;
}
auto initial_param = parse_self_param ();
if (!initial_param.has_value ()
&& initial_param.error ().kind != Parse::Error::Self::Kind::NOT_SELF)
return nullptr;
if (initial_param.has_value () && lexer.peek_token ()->get_id () == COMMA)
skip_token ();
// parse function parameters (only if next token isn't right paren)
std::vector> function_params;
if (lexer.peek_token ()->get_id () != RIGHT_PAREN)
function_params
= parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; });
if (initial_param.has_value ())
function_params.insert (function_params.begin (),
std::move (*initial_param));
if (!skip_token (RIGHT_PAREN))
{
Error error (lexer.peek_token ()->get_locus (),
"function declaration missing closing parentheses after "
"parameter list");
add_error (std::move (error));
skip_after_next_block ();
return nullptr;
}
// parse function return type - if exists
std::unique_ptr return_type = parse_function_return_type ();
// parse where clause - if exists
AST::WhereClause where_clause = parse_where_clause ();
tl::optional> body = tl::nullopt;
if (lexer.peek_token ()->get_id () == SEMICOLON)
lexer.skip_token ();
else
{
auto block_expr = parse_block_expr ();
if (!block_expr)
return nullptr;
body = std::move (block_expr.value ());
}
return std::unique_ptr (new AST::Function (
std::move (function_name), std::move (qualifiers.value ()),
std::move (generic_params), std::move (function_params),
std::move (return_type), std::move (where_clause), std::move (body),
std::move (vis), std::move (outer_attrs), locus, is_external));
}
// Parses function or method qualifiers (i.e. const, unsafe, and extern).
template
tl::expected
Parser::parse_function_qualifiers ()
{
location_t locus = lexer.peek_token ()->get_locus ();
auto parsed = parse_function_qualifiers_raw (locus);
if (!parsed)
return tl::unexpected (parsed.error ());
return function_qualifiers_from_keywords (locus, std::move (parsed->first),
std::move (parsed->second));
}
// Take the list of parsed function qualifiers and convert it to
// the corrresponding flags to pass to the AST item constructor.
//
// This assumes ``keywords`` contains only those tokens that
// map to qualifiers.
template
tl::expected
Parser::function_qualifiers_from_keywords (
location_t locus, const std::vector keywords, std::string abi)
{
Default default_status = Default::No;
Async async_status = Async::No;
Const const_status = Const::No;
Unsafety unsafe_status = Unsafety::Normal;
bool has_extern = false;
for (auto qualifier : keywords)
{
switch (qualifier)
{
case IDENTIFIER:
// only "default" is valid in this context
default_status = Default::Yes;
continue;
case CONST:
const_status = Const::Yes;
continue;
case ASYNC:
async_status = Async::Yes;
continue;
case UNSAFE:
unsafe_status = Unsafety::Unsafe;
continue;
case EXTERN_KW:
has_extern = true;
continue;
default:
// non-qualifier token in input
rust_unreachable ();
}
}
return AST::FunctionQualifiers (locus, default_status, async_status,
const_status, unsafe_status, has_extern,
std::move (abi));
}
// this consumes as many function qualifier tokens while ensuring
// uniqueness.
template
tl::expected, std::string>, Parse::Error::Node>
Parser::parse_function_qualifiers_raw (location_t locus)
{
std::vector found_order;
std::string abi;
// this will terminate on duplicates or the first non-qualifier token
while (true)
{
auto token = lexer.peek_token ();
const TokenId token_id = token->get_id ();
location_t locus = lexer.peek_token ()->get_locus ();
switch (token_id)
{
case IDENTIFIER:
if (token->get_str () != Values::WeakKeywords::DEFAULT)
{
// only "default" is valid in this context, so this must
// be a non-qualifier keyword
goto done;
}
// fallthrough
case CONST:
case ASYNC:
case UNSAFE:
found_order.push_back (token_id);
lexer.skip_token ();
break;
case EXTERN_KW:
{
found_order.push_back (token_id);
lexer.skip_token ();
// detect optional abi name
const_TokenPtr next_tok = lexer.peek_token ();
if (next_tok->get_id () == STRING_LITERAL)
{
abi = next_tok->get_str ();
lexer.skip_token ();
}
}
break;
default:
// non-qualifier keyword
goto done;
}
if (std::count (found_order.cbegin (), found_order.cend (), token_id) > 1)
{
// qualifiers mustn't appear twice
Error error (locus, "encountered duplicate function qualifier %qs",
token->get_token_description ());
add_error (std::move (error));
return tl::unexpected (
Parse::Error::Node::MALFORMED);
}
}
done:
if (!ensure_function_qualifier_order (locus, found_order))
return tl::unexpected (Parse::Error::Node::MALFORMED);
return make_pair (found_order, abi);
}
// Validate the order of the list of function qualifiers; this assumes that
// ``found_order`` consists only of function qualifier tokens.
//
// If the order is illegal, the generated error message gives both the wrong
// order as found in the source and the correct order according to Rust syntax
// rules.
template
bool
Parser::ensure_function_qualifier_order (
location_t locus, const std::vector &found_order)
{
// Check in order of default, const, async, unsafe, extern
auto token_priority = [] (const TokenId id) {
switch (id)
{
case IDENTIFIER: // "default"; the only "weak" keyword considered here
return 1;
case CONST:
return 2;
case ASYNC:
return 3;
case UNSAFE:
return 4;
case EXTERN_KW:
return 5;
default:
rust_unreachable ();
};
};
size_t last_priority = 0;
for (auto token_id : found_order)
{
const size_t priority = token_priority (token_id);
if (priority <= last_priority)
{
emit_function_qualifier_order_error_msg (locus, found_order);
return false;
}
last_priority = priority;
}
return true;
}
template
void
Parser::emit_function_qualifier_order_error_msg (
location_t locus, const std::vector &found_order)
{
std::vector expected_order
= {IDENTIFIER, CONST, ASYNC, UNSAFE, EXTERN_KW};
// we only keep the qualifiers actually used in the offending code
std::vector::iterator token_id = expected_order.begin ();
while (token_id != expected_order.end ())
{
if (std::find (found_order.cbegin (), found_order.cend (), *token_id)
== found_order.cend ())
{
token_id = expected_order.erase (token_id);
}
else
{
++token_id;
}
}
auto qualifiers_to_str = [] (const std::vector &token_ids) {
std::ostringstream ss;
for (auto id : token_ids)
{
if (ss.tellp () != 0)
ss << ' ';
if (id == IDENTIFIER)
ss << Values::WeakKeywords::DEFAULT;
else
ss << token_id_keyword_string (id);
}
return ss.str ();
};
const std::string found_qualifiers = qualifiers_to_str (found_order);
const std::string expected_qualifiers = qualifiers_to_str (expected_order);
location_t error_locus
= make_location (locus, locus, lexer.peek_token ()->get_locus ());
Error error (error_locus,
"invalid order of function qualifiers; found %qs, expected %qs",
found_qualifiers.c_str (), expected_qualifiers.c_str ());
add_error (std::move (error));
}
// Parses generic (lifetime or type) params inside angle brackets (optional).
template
std::vector>
Parser::parse_generic_params_in_angles ()
{
if (lexer.peek_token ()->get_id () != LEFT_ANGLE)
{
// seems to be no generic params, so exit with empty vector
return std::vector> ();
}
lexer.skip_token ();
// DEBUG:
rust_debug ("skipped left angle in generic param");
std::vector> generic_params
= parse_generic_params (Parse::Utils::is_right_angle_tok);
// DEBUG:
rust_debug ("finished parsing actual generic params (i.e. inside angles)");
if (!skip_generics_right_angle ())
{
// DEBUG
rust_debug ("failed to skip generics right angle - returning empty "
"generic params");
return std::vector> ();
}
return generic_params;
}
template
template
std::unique_ptr
Parser::parse_generic_param (EndTokenPred is_end_token)
{
auto outer_attrs = parse_outer_attributes ();
std::unique_ptr param;
auto token = lexer.peek_token ();
switch (token->get_id ())
{
case LIFETIME:
{
auto lifetime = parse_lifetime (false);
if (!lifetime)
{
Error error (token->get_locus (),
"failed to parse lifetime in generic parameter list");
add_error (std::move (error));
return nullptr;
}
std::vector lifetime_bounds;
if (lexer.peek_token ()->get_id () == COLON)
{
lexer.skip_token ();
// parse required bounds
lifetime_bounds
= parse_lifetime_bounds ([is_end_token] (TokenId id) {
return is_end_token (id) || id == COMMA;
});
}
param = std::unique_ptr (new AST::LifetimeParam (
std::move (lifetime.value ()), std::move (lifetime_bounds),
std::move (outer_attrs), token->get_locus ()));
break;
}
case IDENTIFIER:
{
auto type_ident = token->get_str ();
lexer.skip_token ();
std::vector> type_param_bounds;
if (lexer.peek_token ()->get_id () == COLON)
{
lexer.skip_token ();
// parse optional type param bounds
type_param_bounds = parse_type_param_bounds ();
}
std::unique_ptr type = nullptr;
if (lexer.peek_token ()->get_id () == EQUAL)
{
lexer.skip_token ();
// parse required type
type = parse_type ();
if (!type)
{
Error error (
lexer.peek_token ()->get_locus (),
"failed to parse type in type param in generic params");
add_error (std::move (error));
return nullptr;
}
}
param = std::unique_ptr (
new AST::TypeParam (std::move (type_ident), token->get_locus (),
std::move (type_param_bounds), std::move (type),
std::move (outer_attrs)));
break;
}
case CONST:
{
lexer.skip_token ();
auto name_token = expect_token (IDENTIFIER);
if (!name_token || !expect_token (COLON))
return nullptr;
auto type = parse_type ();
if (!type)
return nullptr;
// optional default value
tl::optional default_expr = tl::nullopt;
if (lexer.peek_token ()->get_id () == EQUAL)
{
lexer.skip_token ();
auto tok = lexer.peek_token ();
default_expr = parse_generic_arg ();
if (!default_expr)
{
Error error (tok->get_locus (),
"invalid token for start of default value for "
"const generic parameter: expected %, "
"% or %, got %qs",
token_id_to_str (tok->get_id ()));
add_error (std::move (error));
return nullptr;
}
// At this point, we *know* that we are parsing a const
// expression
if (default_expr.value ().get_kind ()
== AST::GenericArg::Kind::Either)
default_expr = default_expr.value ().disambiguate_to_const ();
}
param = std::unique_ptr (
new AST::ConstGenericParam (name_token->get_str (), std::move (type),
default_expr, std::move (outer_attrs),
token->get_locus ()));
break;
}
default:
// FIXME: Can we clean this last call with a method call?
Error error (token->get_locus (),
"unexpected token when parsing generic parameters: %qs",
token->as_string ().c_str ());
add_error (std::move (error));
return nullptr;
}
return param;
}
/* Parse generic (lifetime or type) params NOT INSIDE ANGLE BRACKETS!!! Almost
* always parse_generic_params_in_angles is what is wanted. */
template
template
std::vector>
Parser::parse_generic_params (EndTokenPred is_end_token)
{
std::vector> generic_params;
/* can't parse lifetime and type params separately due to lookahead issues
* thus, parse them all here */
/* HACK: used to retain attribute data if a lifetime param is tentatively
* parsed but it turns out to be type param */
AST::Attribute parsed_outer_attr = AST::Attribute::create_empty ();
// Did we parse a generic type param yet
auto type_seen = false;
// Did we parse a const param with a default value yet
auto const_with_default_seen = false;
// Did the user write a lifetime parameter after a type one
auto order_error = false;
// Did the user write a const param with a default value after a type one
auto const_with_default_order_error = false;
// parse lifetime params
while (!is_end_token (lexer.peek_token ()->get_id ()))
{
auto param = parse_generic_param (is_end_token);
if (param)
{
if (param->get_kind () == AST::GenericParam::Kind::Type)
{
type_seen = true;
if (const_with_default_seen)
const_with_default_order_error = true;
}
else if (param->get_kind () == AST::GenericParam::Kind::Lifetime
&& type_seen)
{
order_error = true;
if (const_with_default_seen)
const_with_default_order_error = true;
}
else if (param->get_kind () == AST::GenericParam::Kind::Const)
{
type_seen = true;
AST::ConstGenericParam *const_param
= static_cast (param.get ());
if (const_param->has_default_value ())
const_with_default_seen = true;
else if (const_with_default_seen)
const_with_default_order_error = true;
}
generic_params.emplace_back (std::move (param));
maybe_skip_token (COMMA);
}
else
break;
}
// FIXME: Add reordering hint
if (order_error)
{
Error error (generic_params.front ()->get_locus (),
"invalid order for generic parameters: lifetime parameters "
"must be declared prior to type and const parameters");
add_error (std::move (error));
}
if (const_with_default_order_error)
{
Error error (generic_params.front ()->get_locus (),
"invalid order for generic parameters: generic parameters "
"with a default must be trailing");
add_error (std::move (error));
}
generic_params.shrink_to_fit ();
return generic_params;
}
/* Parses lifetime generic parameters (pointers). Will also consume any
* trailing comma. No extra checks for end token. */
template
std::vector>
Parser::parse_lifetime_params ()
{
std::vector> lifetime_params;
while (lexer.peek_token ()->get_id () != END_OF_FILE)
{
auto lifetime_param = parse_lifetime_param ();
if (!lifetime_param)
{
// can't treat as error as only way to get out with trailing comma
break;
}
lifetime_params.emplace_back (
new AST::LifetimeParam (std::move (lifetime_param.value ())));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
lifetime_params.shrink_to_fit ();
return lifetime_params;
}
/* Parses lifetime generic parameters (pointers). Will also consume any
* trailing comma. Has extra is_end_token predicate checking. */
template
template
std::vector>
Parser::parse_lifetime_params (EndTokenPred is_end_token)
{
std::vector> lifetime_params;
// if end_token is not specified, it defaults to EOF, so should work fine
while (!is_end_token (lexer.peek_token ()->get_id ()))
{
auto lifetime_param = parse_lifetime_param ();
if (!lifetime_param)
{
/* TODO: is it worth throwing away all lifetime params just because
* one failed? */
Error error (lexer.peek_token ()->get_locus (),
"failed to parse lifetime param in lifetime params");
add_error (std::move (error));
return {};
}
lifetime_params.emplace_back (
new AST::LifetimeParam (std::move (lifetime_param)));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
lifetime_params.shrink_to_fit ();
return lifetime_params;
}
/* Parses lifetime generic parameters (objects). Will also consume any
* trailing comma. No extra checks for end token.
* TODO: is this best solution? implements most of the same algorithm.
* TODO: seems to be unused, remove? */
template
std::vector
Parser::parse_lifetime_params_objs ()
{
std::vector lifetime_params;
// bad control structure as end token cannot be guaranteed
while (true)
{
auto lifetime_param = parse_lifetime_param ();
if (!lifetime_param)
{
// not an error as only way to exit if trailing comma
break;
}
lifetime_params.push_back (std::move (lifetime_param.value ()));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
lifetime_params.shrink_to_fit ();
return lifetime_params;
}
/* Parses lifetime generic parameters (objects). Will also consume any
* trailing comma. Has extra is_end_token predicate checking.
* TODO: is this best solution? implements most of the same algorithm. */
template
template
std::vector
Parser::parse_lifetime_params_objs (
EndTokenPred is_end_token)
{
std::vector lifetime_params;
while (!is_end_token (lexer.peek_token ()->get_id ()))
{
auto lifetime_param = parse_lifetime_param ();
if (!lifetime_param)
{
/* TODO: is it worth throwing away all lifetime params just because
* one failed? */
Error error (lexer.peek_token ()->get_locus (),
"failed to parse lifetime param in lifetime params");
add_error (std::move (error));
return {};
}
lifetime_params.push_back (std::move (lifetime_param.value ()));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
lifetime_params.shrink_to_fit ();
return lifetime_params;
}
/* Parses a sequence of a certain grammar rule in object form (not pointer or
* smart pointer), delimited by commas and ending when 'is_end_token' is
* satisfied (templated). Will also consume any trailing comma.
* FIXME: this cannot be used due to member function pointer problems (i.e.
* parsing_function cannot be specified properly) */
template
template
auto
Parser::parse_non_ptr_sequence (
ParseFunction parsing_function, EndTokenPred is_end_token,
std::string error_msg) -> std::vector
{
std::vector params;
while (!is_end_token (lexer.peek_token ()->get_id ()))
{
auto param = parsing_function ();
if (param.is_error ())
{
// TODO: is it worth throwing away all params just because one
// failed?
Error error (lexer.peek_token ()->get_locus (),
std::move (error_msg));
add_error (std::move (error));
return {};
}
params.push_back (std::move (param));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
params.shrink_to_fit ();
return params;
}
/* Parses a single lifetime generic parameter (not including comma). */
template
tl::expected
Parser::parse_lifetime_param ()
{
// parse outer attributes, which are optional and may not exist
auto outer_attrs = parse_outer_attributes ();
// save lifetime token - required
const_TokenPtr lifetime_tok = lexer.peek_token ();
if (lifetime_tok->get_id () != LIFETIME)
{
// if lifetime is missing, must not be a lifetime param, so return error
return Parse::Error::LifetimeParam::make_not_a_lifetime_param ();
}
lexer.skip_token ();
AST::Lifetime lifetime (AST::Lifetime::NAMED, lifetime_tok->get_str (),
lifetime_tok->get_locus ());
// parse lifetime bounds, if it exists
std::vector lifetime_bounds;
if (lexer.peek_token ()->get_id () == COLON)
{
// parse lifetime bounds
lifetime_bounds = parse_lifetime_bounds ();
// TODO: have end token passed in?
}
return AST::LifetimeParam (std::move (lifetime), std::move (lifetime_bounds),
std::move (outer_attrs),
lifetime_tok->get_locus ());
}
// Parses type generic parameters. Will also consume any trailing comma.
template
std::vector>
Parser::parse_type_params ()
{
std::vector> type_params;
// infinite loop with break on failure as no info on ending token
while (true)
{
std::unique_ptr type_param = parse_type_param ();
if (type_param == nullptr)
{
// break if fails to parse
break;
}
type_params.push_back (std::move (type_param));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
type_params.shrink_to_fit ();
return type_params;
}
// Parses type generic parameters. Will also consume any trailing comma.
template
template
std::vector>
Parser::parse_type_params (EndTokenPred is_end_token)
{
std::vector> type_params;
while (!is_end_token (lexer.peek_token ()->get_id ()))
{
std::unique_ptr type_param = parse_type_param ();
if (type_param == nullptr)
{
Error error (lexer.peek_token ()->get_locus (),
"failed to parse type param in type params");
add_error (std::move (error));
return {};
}
type_params.push_back (std::move (type_param));
if (lexer.peek_token ()->get_id () != COMMA)
break;
// skip commas, including trailing commas
lexer.skip_token ();
}
type_params.shrink_to_fit ();
return type_params;
/* TODO: this shares most code with parse_lifetime_params - good place to
* use template (i.e. parse_non_ptr_sequence if doable) */
}
/* Parses a single type (generic) parameter, not including commas. May change
* to return value. */
template
std::unique_ptr
Parser::parse_type_param ()
{
// parse outer attributes, which are optional and may not exist
auto outer_attrs = parse_outer_attributes ();
const_TokenPtr identifier_tok = lexer.peek_token ();
if (identifier_tok->get_id () != IDENTIFIER)
{
// return null as type param can't exist without this required
// identifier
return nullptr;
}
Identifier ident{identifier_tok};
lexer.skip_token ();
// parse type param bounds (if they exist)
std::vector> type_param_bounds;
if (lexer.peek_token ()->get_id () == COLON)
{
lexer.skip_token ();
// parse type param bounds, which may or may not exist
type_param_bounds = parse_type_param_bounds ();
}
// parse type (if it exists)
std::unique_ptr type = nullptr;
if (lexer.peek_token ()->get_id () == EQUAL)
{
lexer.skip_token ();
// parse type (now required)
type = parse_type ();
if (type == nullptr)
{
Error error (lexer.peek_token ()->get_locus (),
"failed to parse type in type param");
add_error (std::move (error));
return nullptr;
}
}
return std::unique_ptr (
new AST::TypeParam (std::move (ident), identifier_tok->get_locus (),
std::move (type_param_bounds), std::move (type),
std::move (outer_attrs)));
}
/* Parses regular (i.e. non-generic) parameters in functions or methods. Also
* has end token handling. */
template
template
std::vector>
Parser::parse_function_params (EndTokenPred is_end_token)
{
std::vector> params;
if (is_end_token (lexer.peek_token ()->get_id ()))
return params;
auto initial_param = parse_function_param ();
// Return empty parameter list if no parameter there
if (initial_param == nullptr)
{
// TODO: is this an error?
return params;
}
params.push_back (std::move (initial_param));
// maybe think of a better control structure here - do-while with an initial
// error state? basically, loop through parameter list until can't find any
// more params
const_TokenPtr t = lexer.peek_token ();
while (t->get_id () == COMMA)
{
// skip comma if applies
lexer.skip_token ();
// TODO: strictly speaking, shouldn't there be no trailing comma?
if (is_end_token (lexer.peek_token ()->get_id ()))
break;
// now, as right paren would break, function param is required
auto param = parse_function_param ();
if (param == nullptr)
{
Error error (lexer.peek_token ()->get_locus (),
"failed to parse function param (in function params)");
add_error (std::move (error));
// skip somewhere?
return std::vector> ();
}
params.push_back (std::move (param));
t = lexer.peek_token ();
}
params.shrink_to_fit ();
return params;
}
/* Parses a single regular (i.e. non-generic) parameter in a function or
* method, i.e. the "name: type" bit. Also handles it not existing. */
template
std::unique_ptr
Parser::parse_function_param ()
{
// parse outer attributes if they exist
AST::AttrVec outer_attrs = parse_outer_attributes ();
// TODO: should saved location be at start of outer attributes or pattern?
location_t locus = lexer.peek_token ()->get_locus ();
if (lexer.peek_token ()->get_id () == ELLIPSIS) // Unnamed variadic
{
lexer.skip_token (); // Skip ellipsis
return std::make_unique (
AST::VariadicParam (std::move (outer_attrs), locus));
}
std::unique_ptr param_pattern = parse_pattern ();
// create error function param if it doesn't exist
if (param_pattern == nullptr)
{
// skip after something
return nullptr;
}
if (!skip_token (COLON))
{
// skip after something
return nullptr;
}
if (lexer.peek_token ()->get_id () == ELLIPSIS) // Named variadic
{
lexer.skip_token (); // Skip ellipsis
return std::make_unique (
AST::VariadicParam (std::move (param_pattern), std::move (outer_attrs),
locus));
}
else
{
std::unique_ptr param_type = parse_type ();
if (param_type == nullptr)
{
return nullptr;
}
return std::make_unique (
AST::FunctionParam (std::move (param_pattern), std::move (param_type),
std::move (outer_attrs), locus));
}
}
/* Parses a function or method return type syntactical construction. Also
* handles a function return type not existing. */
template
std::unique_ptr
Parser::parse_function_return_type ()
{
if (lexer.peek_token ()->get_id () != RETURN_TYPE)
return nullptr;
// skip return type, as it now obviously exists
lexer.skip_token ();
std::unique_ptr type = parse_type ();
return type;
}
/* Parses a "where clause" (in a function, struct, method, etc.). Also handles
* a where clause not existing, in which it will return
* WhereClause::create_empty(), which can be checked via
* WhereClause::is_empty(). */
template
AST::WhereClause
Parser::parse_where_clause ()
{
const_TokenPtr where_tok = lexer.peek_token ();
if (where_tok->get_id () != WHERE)
{
// where clause doesn't exist, so create empty one
return AST::WhereClause::create_empty ();
}
lexer.skip_token ();
/* parse where clause items - this is not a separate rule in the reference
* so won't be here */
std::vector> where_clause_items;
std::vector for_lifetimes;
if (lexer.peek_token ()->get_id () == FOR)
for_lifetimes = parse_for_lifetimes ();
/* HACK: where clauses end with a right curly or semicolon or equals in all
* uses currently */
const_TokenPtr t = lexer.peek_token ();
while (t->get_id () != LEFT_CURLY && t->get_id () != SEMICOLON
&& t->get_id () != EQUAL)
{
std::unique_ptr where_clause_item
= parse_where_clause_item (for_lifetimes);
if (where_clause_item == nullptr)
{
Error error (t->get_locus (), "failed to parse where clause item");
add_error (std::move (error));
return AST::WhereClause::create_empty ();
}
where_clause_items.push_back (std::move (where_clause_item));
// also skip comma if it exists
if (lexer.peek_token ()->get_id () != COMMA)
break;
lexer.skip_token ();
t = lexer.peek_token ();
}
where_clause_items.shrink_to_fit ();
return AST::WhereClause (std::move (where_clause_items));
}
/* Parses a where clause item (lifetime or type bound). Does not parse any
* commas. */
template
std::unique_ptr
Parser::parse_where_clause_item (
const std::vector &outer_for_lifetimes)
{
// shitty cheat way of determining lifetime or type bound - test for
// lifetime
const_TokenPtr t = lexer.peek_token ();
if (t->get_id () == LIFETIME)
return parse_lifetime_where_clause_item ();
else
return parse_type_bound_where_clause_item (outer_for_lifetimes);
}
// Parses a lifetime where clause item.
template
std::unique_ptr
Parser::parse_lifetime_where_clause_item ()
{
auto parsed_lifetime = parse_lifetime (false);
if (!parsed_lifetime)
{
// TODO: error here?
return nullptr;
}
auto lifetime = parsed_lifetime.value ();
if (!skip_token (COLON))
{
// TODO: skip after somewhere
return nullptr;
}
std::vector lifetime_bounds = parse_lifetime_bounds ();
// TODO: have end token passed in?
location_t locus = lifetime.get_locus ();
return std::unique_ptr (
new AST::LifetimeWhereClauseItem (std::move (lifetime),
std::move (lifetime_bounds), locus));
}
// Parses a type bound where clause item.
template
std::unique_ptr
Parser::parse_type_bound_where_clause_item (
const std::vector &outer_for_lifetimes)
{
std::vector for_lifetimes = outer_for_lifetimes;
std::unique_ptr type = parse_type ();
if (type == nullptr)
{
return nullptr;
}
if (!skip_token (COLON))
{
// TODO: skip after somewhere
return nullptr;
}
if (lexer.peek_token ()->get_id () == FOR)
{
auto for_lifetimes_inner = parse_for_lifetimes ();
for_lifetimes.insert (for_lifetimes.end (), for_lifetimes_inner.begin (),
for_lifetimes_inner.end ());
}
// parse type param bounds if they exist
std::vector> type_param_bounds
= parse_type_param_bounds ();
location_t locus = lexer.peek_token ()->get_locus ();
return std::unique_ptr (
new AST::TypeBoundWhereClauseItem (std::move (for_lifetimes),
std::move (type),
std::move (type_param_bounds), locus));
}
// Parses a for lifetimes clause, including the for keyword and angle
// brackets.
template
std::vector
Parser::parse_for_lifetimes ()
{
std::vector params;
if (!skip_token (FOR))
{
// skip after somewhere?
return params;
}
if (!skip_token (LEFT_ANGLE))
{
// skip after somewhere?
return params;
}
/* cannot specify end token due to parsing problems with '>' tokens being
* nested */
params = parse_lifetime_params_objs (Parse::Utils::is_right_angle_tok);
if (!skip_generics_right_angle ())
{
// DEBUG
rust_debug ("failed to skip generics right angle after (supposedly) "
"finished parsing where clause items");
// ok, well this gets called.
// skip after somewhere?
return params;
}
return params;
}
// Parses type parameter bounds in where clause or generic arguments.
template
std::vector>
Parser::parse_type_param_bounds ()
{
std::vector> type_param_bounds;
std::unique_ptr initial_bound
= parse_type_param_bound ();
// quick exit if null
if (initial_bound == nullptr)
{
/* error? type param bounds must have at least one term, but are bounds
* optional? */
return type_param_bounds;
}
type_param_bounds.push_back (std::move (initial_bound));
while (lexer.peek_token ()->get_id () == PLUS)
{
lexer.skip_token ();
std::unique_ptr bound = parse_type_param_bound ();
if (bound == nullptr)
{
/* not an error: bound is allowed to be null as trailing plus is
* allowed */
return type_param_bounds;
}
type_param_bounds.push_back (std::move (bound));
}
type_param_bounds.shrink_to_fit ();
return type_param_bounds;
}
/* Parses type parameter bounds in where clause or generic arguments, with end
* token handling. */
template
template
std::vector>
Parser::parse_type_param_bounds (EndTokenPred is_end_token)
{
std::vector> type_param_bounds;
std::unique_ptr initial_bound
= parse_type_param_bound ();
// quick exit if null
if (initial_bound == nullptr)
{
/* error? type param bounds must have at least one term, but are bounds
* optional? */
return type_param_bounds;
}
type_param_bounds.push_back (std::move (initial_bound));
while (lexer.peek_token ()->get_id () == PLUS)
{
lexer.skip_token ();
// break if end token character
if (is_end_token (lexer.peek_token ()->get_id ()))
break;
std::unique_ptr bound = parse_type_param_bound ();
if (bound == nullptr)
{
// TODO how wise is it to ditch all bounds if only one failed?
Error error (lexer.peek_token ()->get_locus (),
"failed to parse type param bound in type param bounds");
add_error (std::move (error));
return {};
}
type_param_bounds.push_back (std::move (bound));
}
type_param_bounds.shrink_to_fit ();
return type_param_bounds;
}
/* Parses a single type parameter bound in a where clause or generic argument.
* Does not parse the '+' between arguments. */
template
std::unique_ptr
Parser::parse_type_param_bound ()
{
// shitty cheat way of determining lifetime or trait bound - test for
// lifetime
const_TokenPtr t = lexer.peek_token ();
switch (t->get_id ())
{
case LIFETIME:
return std::unique_ptr (
new AST::Lifetime (parse_lifetime (false).value ()));
case LEFT_PAREN:
case QUESTION_MARK:
case FOR:
case IDENTIFIER:
case SUPER:
case SELF:
case SELF_ALIAS:
case CRATE:
case DOLLAR_SIGN:
case SCOPE_RESOLUTION:
return parse_trait_bound ();
default:
// don't error - assume this is fine TODO
return nullptr;
}
}
// Parses a trait bound type param bound.
template
std::unique_ptr
Parser::parse_trait_bound ()
{
bool has_parens = false;
bool has_question_mark = false;
location_t locus = lexer.peek_token ()->get_locus ();
/* parse optional `for lifetimes`. */
std::vector for_lifetimes;
if (lexer.peek_token ()->get_id () == FOR)
for_lifetimes = parse_for_lifetimes ();
// handle trait bound being in parentheses
if (lexer.peek_token ()->get_id () == LEFT_PAREN)
{
has_parens = true;
lexer.skip_token ();
}
// handle having question mark (optional)
if (lexer.peek_token ()->get_id () == QUESTION_MARK)
{
has_question_mark = true;
lexer.skip_token ();
}
// handle TypePath
AST::TypePath type_path = parse_type_path ();
if (type_path.is_error ())
return nullptr;
// handle closing parentheses
if (has_parens)
{
if (!skip_token (RIGHT_PAREN))
{
return nullptr;
}
}
return std::unique_ptr