// Copyright (C) 2020-2025 Free Software Foundation, Inc.
// This file is part of GCC.
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3. If not see
// .
#include "rust-macro-expand.h"
#include "optional.h"
#include "rust-ast-fragment.h"
#include "rust-macro-substitute-ctx.h"
#include "rust-ast-full.h"
#include "rust-ast-visitor.h"
#include "rust-diagnostics.h"
#include "rust-parse.h"
#include "rust-cfg-strip.h"
#include "rust-early-name-resolver.h"
#include "rust-session-manager.h"
#include "rust-proc-macro.h"
namespace Rust {
AST::Fragment
MacroExpander::expand_decl_macro (location_t invoc_locus,
AST::MacroInvocData &invoc,
AST::MacroRulesDefinition &rules_def,
AST::InvocKind semicolon)
{
// ensure that both invocation and rules are in a valid state
rust_assert (!invoc.is_marked_for_strip ());
rust_assert (!rules_def.is_marked_for_strip ());
rust_assert (rules_def.get_macro_rules ().size () > 0);
/* probably something here about parsing invoc and rules def token trees to
* token stream. if not, how would parser handle the captures of exprs and
* stuff? on the other hand, token trees may be kind of useful in rules def
* as creating a point where recursion can occur (like having
* "compare_macro_match" and then it calling itself when it finds
* delimiters)
*/
/* find matching rule to invoc token tree, based on macro rule's matcher. if
* none exist, error.
* - specifically, check each matcher in order. if one fails to match, move
* onto next. */
/* TODO: does doing this require parsing expressions and whatever in the
* invoc? if so, might as well save the results if referenced using $ or
* whatever. If not, do another pass saving them. Except this is probably
* useless as different rules could have different starting points for exprs
* or whatever. Decision trees could avoid this, but they have their own
* issues. */
/* TODO: will need to modify the parser so that it can essentially "catch"
* errors - maybe "try_parse_expr" or whatever methods. */
// this technically creates a back-tracking parser - this will be the
// implementation style
/* then, after results are saved, generate the macro output from the
* transcriber token tree. if i understand this correctly, the macro
* invocation gets replaced by the transcriber tokens, except with
* substitutions made (e.g. for $i variables) */
/* TODO: it is probably better to modify AST::Token to store a pointer to a
* Lexer::Token (rather than being converted) - i.e. not so much have
* AST::Token as a Token but rather a TokenContainer (as it is another type
* of TokenTree). This will prevent re-conversion of Tokens between each
* type all the time, while still allowing the heterogenous storage of token
* trees.
*/
AST::DelimTokenTree &invoc_token_tree = invoc.get_delim_tok_tree ();
// find matching arm
AST::MacroRule *matched_rule = nullptr;
std::map>
matched_fragments;
for (auto &rule : rules_def.get_rules ())
{
sub_stack.push ();
bool did_match_rule = try_match_rule (rule, invoc_token_tree);
matched_fragments = sub_stack.pop ();
if (did_match_rule)
{
// // Debugging
// for (auto &kv : matched_fragments)
// rust_debug ("[fragment]: %s (%ld - %s)", kv.first.c_str (),
// kv.second.get_fragments ().size (),
// kv.second.get_kind ()
// == MatchedFragmentContainer::Kind::Repetition
// ? "repetition"
// : "metavar");
matched_rule = &rule;
break;
}
}
if (matched_rule == nullptr)
{
rich_location r (line_table, invoc_locus);
r.add_range (rules_def.get_locus ());
rust_error_at (r, "Failed to match any rule within macro");
return AST::Fragment::create_error ();
}
std::map matched_fragments_ptr;
for (auto &ent : matched_fragments)
matched_fragments_ptr.emplace (ent.first, ent.second.get ());
return transcribe_rule (*matched_rule, invoc_token_tree,
matched_fragments_ptr, semicolon, peek_context ());
}
void
MacroExpander::expand_eager_invocations (AST::MacroInvocation &invoc)
{
if (invoc.get_pending_eager_invocations ().empty ())
return;
// We have to basically create a new delimited token tree which contains the
// result of one step of expansion. In the case of builtin macros called with
// other macro invocations, such as `concat!("h", 'a', a!())`, we need to
// expand `a!()` before expanding the concat macro.
// This will, ideally, give us a new token tree containing the various
// existing tokens + the result of the expansion of a!().
// To do this, we "parse" the given token tree to find anything that "looks
// like a macro invocation". Then, we get the corresponding macro invocation
// from the `pending_eager_invocations` vector and expand it.
// Because the `pending_eager_invocations` vector is created in the same order
// that the DelimTokenTree is parsed, we know that the first macro invocation
// within the DelimTokenTree corresponds to the first element in
// `pending_eager_invocations`. The idea is thus to:
// 1. Find a macro invocation in the token tree, noting the index of the start
// token and of the end token
// 2. Get its associated invocation in `pending_eager_invocations`
// 3. Expand that element
// 4. Get the token tree associated with that AST fragment
// 5. Replace the original tokens corresponding to the invocation with the new
// tokens from the fragment
// pseudo-code:
//
// i = 0;
// for tok in dtt:
// if tok is identifier && tok->next() is !:
// start = index(tok);
// l_delim = tok->next()->next();
// tok = skip_until_r_delim();
// end = index(tok);
//
// new_tt = expand_eager_invoc(eagers[i++]);
// old_tt[start..end] = new_tt;
auto dtt = invoc.get_invoc_data ().get_delim_tok_tree ();
auto stream = dtt.to_token_stream ();
std::vector> new_stream;
size_t current_pending = 0;
// we need to create a clone of the delimited token tree as the lexer
// expects ownership of the tokens
std::vector> dtt_clone;
for (auto &tok : stream)
dtt_clone.emplace_back (tok->clone_token ());
MacroInvocLexer lex (std::move (dtt_clone));
Parser parser (lex);
// we want to build a substitution map - basically, associating a `start` and
// `end` index for each of the pending macro invocations
std::map, std::unique_ptr &>
substitution_map;
for (size_t i = 0; i < stream.size (); i++)
{
// FIXME: Can't these offsets be figure out when we actually parse the
// pending_eager_invocation in the first place?
auto invocation = parser.parse_macro_invocation ({});
// if we've managed to parse a macro invocation, we look at the current
// offset and store them in the substitution map. Otherwise, we skip one
// token and try parsing again
if (invocation)
substitution_map.insert (
{{i, parser.get_token_source ().get_offs ()},
invoc.get_pending_eager_invocations ()[current_pending++]});
else
parser.skip_token (stream[i]->get_id ());
}
size_t current_idx = 0;
for (auto kv : substitution_map)
{
auto &to_expand = kv.second;
expand_invoc (*to_expand, AST::InvocKind::Expr);
auto fragment = take_expanded_fragment ();
auto &new_tokens = fragment.get_tokens ();
auto start = kv.first.first;
auto end = kv.first.second;
// We're now going to re-add the tokens to the invocation's token tree.
// 1. Basically, what we want to do is insert all tokens up until the
// beginning of the macro invocation (start).
// 2. Then, we'll insert all of the tokens resulting from the macro
// expansion: These are in `new_tokens`.
// 3. Finally, we'll do that again from
// the end of macro and go back to 1.
for (size_t i = current_idx; i < start; i++)
new_stream.emplace_back (stream[i]->clone_token ());
for (auto &tok : new_tokens)
new_stream.emplace_back (tok->clone_token ());
current_idx = end;
}
// Once all of that is done, we copy the last remaining tokens from the
// original stream
for (size_t i = current_idx; i < stream.size (); i++)
new_stream.emplace_back (stream[i]->clone_token ());
auto new_dtt
= AST::DelimTokenTree (dtt.get_delim_type (), std::move (new_stream));
invoc.get_pending_eager_invocations ().clear ();
invoc.get_invoc_data ().set_delim_tok_tree (new_dtt);
}
void
MacroExpander::expand_invoc (AST::MacroInvocation &invoc,
AST::InvocKind semicolon)
{
if (depth_exceeds_recursion_limit ())
{
rust_error_at (invoc.get_locus (), "reached recursion limit");
return;
}
if (invoc.get_kind () == AST::MacroInvocation::InvocKind::Builtin)
{
// Eager expansions are always expressions
push_context (ContextType::EXPR);
expand_eager_invocations (invoc);
pop_context ();
}
AST::MacroInvocData &invoc_data = invoc.get_invoc_data ();
// ??
// switch on type of macro:
// - '!' syntax macro (inner switch)
// - procedural macro - "A token-based function-like macro"
// - 'macro_rules' (by example/pattern-match) macro? or not? "an
// AST-based function-like macro"
// - else is unreachable
// - attribute syntax macro (inner switch)
// - procedural macro attribute syntax - "A token-based attribute
// macro"
// - legacy macro attribute syntax? - "an AST-based attribute macro"
// - non-macro attribute: mark known
// - else is unreachable
// - derive macro (inner switch)
// - derive or legacy derive - "token-based" vs "AST-based"
// - else is unreachable
// - derive container macro - unreachable
auto fragment = AST::Fragment::create_error ();
invoc_data.set_expander (this);
// lookup the rules
auto rules_def = mappings.lookup_macro_invocation (invoc);
// If there's no rule associated with the invocation, we can simply return
// early. The early name resolver will have already emitted an error.
if (!rules_def)
return;
auto rdef = rules_def.value ();
// We store the last expanded invocation and macro definition for error
// reporting in case the recursion limit is reached
last_invoc = *invoc.clone_macro_invocation_impl ();
last_def = *rdef;
if (rdef->is_builtin ())
fragment = rdef
->get_builtin_transcriber () (invoc.get_locus (), invoc_data,
semicolon)
.value_or (AST::Fragment::create_empty ());
else
fragment
= expand_decl_macro (invoc.get_locus (), invoc_data, *rdef, semicolon);
set_expanded_fragment (std::move (fragment));
}
void
MacroExpander::expand_crate ()
{
NodeId scope_node_id = crate.get_node_id ();
resolver->get_macro_scope ().push (scope_node_id);
/* fill macro/decorator map from init list? not sure where init list comes
* from? */
// TODO: does cfg apply for inner attributes? research.
// the apparent answer (from playground test) is yes
push_context (ContextType::ITEM);
// expand attributes recursively and strip items if required
// AttrVisitor attr_visitor (*this);
auto &items = crate.items;
for (auto it = items.begin (); it != items.end ();)
{
auto &item = *it;
auto fragment = take_expanded_fragment ();
if (fragment.should_expand ())
{
// Remove the current expanded invocation
it = items.erase (it);
for (auto &node : fragment.get_nodes ())
{
it = items.insert (it, node.take_item ());
it++;
}
}
else if (item->is_marked_for_strip ())
it = items.erase (it);
else
it++;
}
pop_context ();
// TODO: should recursive attribute and macro expansion be done in the same
// transversal? Or in separate ones like currently?
// expand module tree recursively
// post-process
// extract exported macros?
}
bool
MacroExpander::depth_exceeds_recursion_limit () const
{
return expansion_depth >= cfg.recursion_limit;
}
bool
MacroExpander::try_match_rule (AST::MacroRule &match_rule,
AST::DelimTokenTree &invoc_token_tree)
{
MacroInvocLexer lex (invoc_token_tree.to_token_stream ());
Parser parser (lex);
AST::MacroMatcher &matcher = match_rule.get_matcher ();
expansion_depth++;
if (!match_matcher (parser, matcher, false, false))
{
expansion_depth--;
return false;
}
expansion_depth--;
bool used_all_input_tokens = parser.skip_token (END_OF_FILE);
return used_all_input_tokens;
}
bool
MacroExpander::match_fragment (Parser &parser,
AST::MacroMatchFragment &fragment)
{
switch (fragment.get_frag_spec ().get_kind ())
{
case AST::MacroFragSpec::EXPR:
parser.parse_expr ();
break;
case AST::MacroFragSpec::BLOCK:
parser.parse_block_expr ();
break;
case AST::MacroFragSpec::IDENT:
parser.parse_identifier_or_keyword_token ();
break;
case AST::MacroFragSpec::LITERAL:
parser.parse_literal_expr ();
break;
case AST::MacroFragSpec::ITEM:
parser.parse_item (false);
break;
case AST::MacroFragSpec::TY:
parser.parse_type ();
break;
case AST::MacroFragSpec::PAT:
parser.parse_pattern ();
break;
case AST::MacroFragSpec::PATH:
parser.parse_path_in_expression ();
break;
case AST::MacroFragSpec::VIS:
parser.parse_visibility ();
break;
case AST::MacroFragSpec::STMT: {
auto restrictions = ParseRestrictions ();
restrictions.consume_semi = false;
parser.parse_stmt (restrictions);
break;
}
case AST::MacroFragSpec::LIFETIME:
parser.parse_lifetime_params ();
break;
// is meta attributes?
case AST::MacroFragSpec::META:
parser.parse_attribute_body ();
break;
case AST::MacroFragSpec::TT:
parser.parse_token_tree ();
break;
// i guess we just ignore invalid and just error out
case AST::MacroFragSpec::INVALID:
return false;
}
// it matches if the parser did not produce errors trying to parse that type
// of item
return !parser.has_errors ();
}
bool
MacroExpander::match_matcher (Parser &parser,
AST::MacroMatcher &matcher, bool in_repetition,
bool match_delim)
{
if (depth_exceeds_recursion_limit ())
{
rust_error_at (matcher.get_match_locus (), "reached recursion limit");
return false;
}
auto delimiter = parser.peek_current_token ();
auto check_delim = [&matcher, match_delim] (AST::DelimType delim) {
return !match_delim || matcher.get_delim_type () == delim;
};
// this is used so we can check that we delimit the stream correctly.
switch (delimiter->get_id ())
{
case LEFT_PAREN: {
if (!check_delim (AST::DelimType::PARENS))
return false;
}
break;
case LEFT_SQUARE: {
if (!check_delim (AST::DelimType::SQUARE))
return false;
}
break;
case LEFT_CURLY: {
if (!check_delim (AST::DelimType::CURLY))
return false;
}
break;
default:
return false;
}
parser.skip_token ();
const MacroInvocLexer &source = parser.get_token_source ();
for (auto &match : matcher.get_matches ())
{
size_t offs_begin = source.get_offs ();
switch (match->get_macro_match_type ())
{
case AST::MacroMatch::MacroMatchType::Fragment: {
AST::MacroMatchFragment *fragment
= static_cast (match.get ());
if (!match_fragment (parser, *fragment))
return false;
// matched fragment get the offset in the token stream
size_t offs_end = source.get_offs ();
sub_stack.insert_metavar (
MatchedFragment (fragment->get_ident ().as_string (), offs_begin,
offs_end));
}
break;
case AST::MacroMatch::MacroMatchType::Tok: {
AST::Token *tok = static_cast (match.get ());
if (!match_token (parser, *tok))
return false;
}
break;
case AST::MacroMatch::MacroMatchType::Repetition: {
AST::MacroMatchRepetition *rep
= static_cast (match.get ());
if (!match_repetition (parser, *rep))
return false;
}
break;
case AST::MacroMatch::MacroMatchType::Matcher: {
AST::MacroMatcher *m
= static_cast (match.get ());
expansion_depth++;
if (!match_matcher (parser, *m, in_repetition))
{
expansion_depth--;
return false;
}
expansion_depth--;
}
break;
}
}
switch (delimiter->get_id ())
{
case LEFT_PAREN: {
if (!parser.skip_token (RIGHT_PAREN))
return false;
}
break;
case LEFT_SQUARE: {
if (!parser.skip_token (RIGHT_SQUARE))
return false;
}
break;
case LEFT_CURLY: {
if (!parser.skip_token (RIGHT_CURLY))
return false;
}
break;
default:
rust_unreachable ();
}
return true;
}
bool
MacroExpander::match_token (Parser &parser, AST::Token &token)
{
return parser.skip_token (token.get_tok_ptr ());
}
bool
MacroExpander::match_n_matches (Parser &parser,
AST::MacroMatchRepetition &rep,
size_t &match_amount, size_t lo_bound,
size_t hi_bound)
{
match_amount = 0;
auto &matches = rep.get_matches ();
const MacroInvocLexer &source = parser.get_token_source ();
while (true)
{
// If the current token is a closing macro delimiter, break away.
// TODO: Is this correct?
auto t_id = parser.peek_current_token ()->get_id ();
if (t_id == RIGHT_PAREN || t_id == RIGHT_SQUARE || t_id == RIGHT_CURLY)
break;
// Skip parsing a separator on the first match, otherwise consume it.
// If it isn't present, this is an error
if (rep.has_sep () && match_amount > 0)
if (!match_token (parser, *rep.get_sep ()))
break;
sub_stack.push ();
bool valid_current_match = false;
for (auto &match : matches)
{
size_t offs_begin = source.get_offs ();
switch (match->get_macro_match_type ())
{
case AST::MacroMatch::MacroMatchType::Fragment: {
AST::MacroMatchFragment *fragment
= static_cast (match.get ());
valid_current_match = match_fragment (parser, *fragment);
// matched fragment get the offset in the token stream
size_t offs_end = source.get_offs ();
sub_stack.insert_metavar (
MatchedFragment (fragment->get_ident ().as_string (),
offs_begin, offs_end));
}
break;
case AST::MacroMatch::MacroMatchType::Tok: {
AST::Token *tok = static_cast (match.get ());
valid_current_match = match_token (parser, *tok);
}
break;
case AST::MacroMatch::MacroMatchType::Repetition: {
AST::MacroMatchRepetition *rep
= static_cast (match.get ());
valid_current_match = match_repetition (parser, *rep);
}
break;
case AST::MacroMatch::MacroMatchType::Matcher: {
AST::MacroMatcher *m
= static_cast (match.get ());
valid_current_match = match_matcher (parser, *m, true);
}
break;
}
}
auto old_stack = sub_stack.pop ();
// nest metavars into repetitions
for (auto &ent : old_stack)
sub_stack.append_fragment (ent.first, std::move (ent.second));
// If we've encountered an error once, stop trying to match more
// repetitions
if (!valid_current_match)
break;
match_amount++;
// Break early if we notice there's too many expressions already
if (hi_bound && match_amount > hi_bound)
break;
}
// Check if the amount of matches we got is valid: Is it more than the lower
// bound and less than the higher bound?
bool did_meet_lo_bound = match_amount >= lo_bound;
bool did_meet_hi_bound = hi_bound ? match_amount <= hi_bound : true;
// If the end-result is valid, then we can clear the parse errors: Since
// repetitions are parsed eagerly, it is okay to fail in some cases
auto res = did_meet_lo_bound && did_meet_hi_bound;
if (res)
parser.clear_errors ();
return res;
}
/*
* Helper function for defining unmatched repetition metavars
*/
void
MacroExpander::match_repetition_skipped_metavars (AST::MacroMatch &match)
{
// We have to handle zero fragments differently: They will not have been
// "matched" but they are still valid and should be inserted as a special
// case. So we go through the stack map, and for every fragment which doesn't
// exist, insert a zero-matched fragment.
switch (match.get_macro_match_type ())
{
case AST::MacroMatch::MacroMatchType::Fragment:
match_repetition_skipped_metavars (
static_cast (match));
break;
case AST::MacroMatch::MacroMatchType::Repetition:
match_repetition_skipped_metavars (
static_cast (match));
break;
case AST::MacroMatch::MacroMatchType::Matcher:
match_repetition_skipped_metavars (
static_cast (match));
break;
case AST::MacroMatch::MacroMatchType::Tok:
break;
}
}
void
MacroExpander::match_repetition_skipped_metavars (
AST::MacroMatchFragment &fragment)
{
auto &stack_map = sub_stack.peek ();
auto it = stack_map.find (fragment.get_ident ().as_string ());
if (it == stack_map.end ())
sub_stack.insert_matches (fragment.get_ident ().as_string (),
MatchedFragmentContainer::zero ());
}
void
MacroExpander::match_repetition_skipped_metavars (
AST::MacroMatchRepetition &rep)
{
for (auto &match : rep.get_matches ())
match_repetition_skipped_metavars (*match);
}
void
MacroExpander::match_repetition_skipped_metavars (AST::MacroMatcher &rep)
{
for (auto &match : rep.get_matches ())
match_repetition_skipped_metavars (*match);
}
bool
MacroExpander::match_repetition (Parser &parser,
AST::MacroMatchRepetition &rep)
{
size_t match_amount = 0;
bool res = false;
std::string lo_str;
std::string hi_str;
switch (rep.get_op ())
{
case AST::MacroMatchRepetition::MacroRepOp::ANY:
lo_str = "0";
hi_str = "+inf";
res = match_n_matches (parser, rep, match_amount);
break;
case AST::MacroMatchRepetition::MacroRepOp::ONE_OR_MORE:
lo_str = "1";
hi_str = "+inf";
res = match_n_matches (parser, rep, match_amount, 1);
break;
case AST::MacroMatchRepetition::MacroRepOp::ZERO_OR_ONE:
lo_str = "0";
hi_str = "1";
res = match_n_matches (parser, rep, match_amount, 0, 1);
break;
default:
rust_unreachable ();
}
rust_debug_loc (rep.get_match_locus (), "%s matched %lu times",
res ? "successfully" : "unsuccessfully",
(unsigned long) match_amount);
match_repetition_skipped_metavars (rep);
return res;
}
/**
* Helper function to refactor calling a parsing function 0 or more times
*/
static AST::Fragment
parse_many (Parser &parser, TokenId delimiter,
std::function parse_fn)
{
auto &lexer = parser.get_token_source ();
auto start = lexer.get_offs ();
std::vector nodes;
while (true)
{
if (parser.peek_current_token ()->get_id () == delimiter)
break;
auto node = parse_fn ();
if (node.is_error ())
{
for (auto err : parser.get_errors ())
err.emit ();
return AST::Fragment::create_error ();
}
nodes.emplace_back (std::move (node));
}
auto end = lexer.get_offs ();
return AST::Fragment (std::move (nodes), lexer.get_token_slice (start, end));
}
/**
* Transcribe 0 or more items from a macro invocation
*
* @param parser Parser to extract items from
* @param delimiter Id of the token on which parsing should stop
*/
static AST::Fragment
transcribe_many_items (Parser &parser, TokenId &delimiter)
{
return parse_many (parser, delimiter, [&parser] () {
auto item = parser.parse_item (true);
return AST::SingleASTNode (std::move (item));
});
}
/**
* Transcribe 0 or more external items from a macro invocation
*
* @param parser Parser to extract items from
* @param delimiter Id of the token on which parsing should stop
*/
static AST::Fragment
transcribe_many_ext (Parser &parser, TokenId &delimiter)
{
return parse_many (parser, delimiter, [&parser] () {
auto item = parser.parse_external_item ();
return AST::SingleASTNode (std::move (item));
});
}
/**
* Transcribe 0 or more trait items from a macro invocation
*
* @param parser Parser to extract items from
* @param delimiter Id of the token on which parsing should stop
*/
static AST::Fragment
transcribe_many_trait_items (Parser &parser,
TokenId &delimiter)
{
return parse_many (parser, delimiter, [&parser] () {
auto item = parser.parse_trait_item ();
return AST::SingleASTNode (std::move (item));
});
}
/**
* Transcribe 0 or more impl items from a macro invocation
*
* @param parser Parser to extract items from
* @param delimiter Id of the token on which parsing should stop
*/
static AST::Fragment
transcribe_many_impl_items (Parser &parser, TokenId &delimiter)
{
return parse_many (parser, delimiter, [&parser] () {
auto item = parser.parse_inherent_impl_item ();
return AST::SingleASTNode (std::move (item));
});
}
/**
* Transcribe 0 or more trait impl items from a macro invocation
*
* @param parser Parser to extract items from
* @param delimiter Id of the token on which parsing should stop
*/
static AST::Fragment
transcribe_many_trait_impl_items (Parser &parser,
TokenId &delimiter)
{
return parse_many (parser, delimiter, [&parser] () {
auto item = parser.parse_trait_impl_item ();
return AST::SingleASTNode (std::move (item));
});
}
/**
* Transcribe 0 or more statements from a macro invocation
*
* @param parser Parser to extract statements from
* @param delimiter Id of the token on which parsing should stop
*/
static AST::Fragment
transcribe_many_stmts (Parser &parser, TokenId delimiter,
bool semicolon)
{
auto restrictions = ParseRestrictions ();
restrictions.allow_close_after_expr_stmt = true;
return parse_many (parser, delimiter,
[&parser, restrictions, delimiter, semicolon] () {
auto stmt = parser.parse_stmt (restrictions);
if (semicolon && stmt
&& parser.peek_current_token ()->get_id ()
== delimiter)
stmt->add_semicolon ();
return AST::SingleASTNode (std::move (stmt));
});
}
/**
* Transcribe one expression from a macro invocation
*
* @param parser Parser to extract statements from
*/
static AST::Fragment
transcribe_expression (Parser &parser)
{
auto &lexer = parser.get_token_source ();
auto start = lexer.get_offs ();
auto expr = parser.parse_expr ();
if (expr == nullptr)
return AST::Fragment::create_error ();
// FIXME: make this an error for some edititons
if (parser.peek_current_token ()->get_id () == SEMICOLON)
{
rust_warning_at (
parser.peek_current_token ()->get_locus (), 0,
"trailing semicolon in macro used in expression context");
parser.skip_token ();
}
auto end = lexer.get_offs ();
return AST::Fragment ({std::move (expr)}, lexer.get_token_slice (start, end));
}
/**
* Transcribe one type from a macro invocation
*
* @param parser Parser to extract statements from
*/
static AST::Fragment
transcribe_type (Parser &parser)
{
auto &lexer = parser.get_token_source ();
auto start = lexer.get_offs ();
auto type = parser.parse_type (true);
for (auto err : parser.get_errors ())
err.emit ();
auto end = lexer.get_offs ();
return AST::Fragment ({std::move (type)}, lexer.get_token_slice (start, end));
}
static AST::Fragment
transcribe_context (MacroExpander::ContextType ctx,
Parser &parser, bool semicolon,
AST::DelimType delimiter, TokenId last_token_id)
{
// The flow-chart in order to choose a parsing function is as follows:
//
// [switch special context]
// -- Item --> parser.parse_item();
// -- Trait --> parser.parse_trait_item();
// -- Impl --> parser.parse_impl_item();
// -- Extern --> parser.parse_extern_item();
// -- None --> [has semicolon?]
// -- Yes --> parser.parse_stmt();
// -- No --> [switch invocation.delimiter()]
// -- { } --> parser.parse_stmt();
// -- _ --> parser.parse_expr(); // once!
// If there is a semicolon OR we are expanding a MacroInvocationSemi, then
// we can parse multiple items. Otherwise, parse *one* expression
switch (ctx)
{
case MacroExpander::ContextType::ITEM:
return transcribe_many_items (parser, last_token_id);
break;
case MacroExpander::ContextType::TRAIT:
return transcribe_many_trait_items (parser, last_token_id);
break;
case MacroExpander::ContextType::IMPL:
return transcribe_many_impl_items (parser, last_token_id);
break;
case MacroExpander::ContextType::TRAIT_IMPL:
return transcribe_many_trait_impl_items (parser, last_token_id);
break;
case MacroExpander::ContextType::EXTERN:
return transcribe_many_ext (parser, last_token_id);
break;
case MacroExpander::ContextType::TYPE:
return transcribe_type (parser);
break;
case MacroExpander::ContextType::STMT:
return transcribe_many_stmts (parser, last_token_id, semicolon);
case MacroExpander::ContextType::EXPR:
return transcribe_expression (parser);
default:
rust_unreachable ();
}
}
static std::string
tokens_to_str (std::vector> &tokens)
{
std::string str;
if (!tokens.empty ())
{
str += tokens[0]->as_string ();
for (size_t i = 1; i < tokens.size (); i++)
str += " " + tokens[i]->as_string ();
}
return str;
}
AST::Fragment
MacroExpander::transcribe_rule (
AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree,
std::map &matched_fragments,
AST::InvocKind invoc_kind, ContextType ctx)
{
bool semicolon = invoc_kind == AST::InvocKind::Semicoloned;
// we can manipulate the token tree to substitute the dollar identifiers so
// that when we call parse its already substituted for us
AST::MacroTranscriber &transcriber = match_rule.get_transcriber ();
AST::DelimTokenTree &transcribe_tree = transcriber.get_token_tree ();
auto invoc_stream = invoc_token_tree.to_token_stream ();
auto macro_rule_tokens = transcribe_tree.to_token_stream ();
auto substitute_context
= SubstituteCtx (invoc_stream, macro_rule_tokens, matched_fragments);
std::vector> substituted_tokens
= substitute_context.substitute_tokens ();
rust_debug ("substituted tokens: %s",
tokens_to_str (substituted_tokens).c_str ());
// parse it to an Fragment
MacroInvocLexer lex (std::move (substituted_tokens));
Parser parser (lex);
auto last_token_id = TokenId::RIGHT_CURLY;
// this is used so we can check that we delimit the stream correctly.
switch (transcribe_tree.get_delim_type ())
{
case AST::DelimType::PARENS:
last_token_id = TokenId::RIGHT_PAREN;
rust_assert (parser.skip_token (LEFT_PAREN));
break;
case AST::DelimType::CURLY:
rust_assert (parser.skip_token (LEFT_CURLY));
break;
case AST::DelimType::SQUARE:
last_token_id = TokenId::RIGHT_SQUARE;
rust_assert (parser.skip_token (LEFT_SQUARE));
break;
}
// see https://github.com/Rust-GCC/gccrs/issues/22
// TL;DR:
// - Treat all macro invocations with parentheses, (), or square brackets,
// [], as expressions.
// - If the macro invocation has curly brackets, {}, it may be parsed as a
// statement depending on the context.
// - If the macro invocation has a semicolon at the end, it must be parsed
// as a statement (either via ExpressionStatement or
// MacroInvocationWithSemi)
auto fragment
= transcribe_context (ctx, parser, semicolon,
invoc_token_tree.get_delim_type (), last_token_id);
// emit any errors
if (parser.has_errors ())
{
for (auto &err : parser.get_errors ())
rust_error_at (err.locus, "%s", err.message.c_str ());
return AST::Fragment::create_error ();
}
// are all the tokens used?
bool did_delimit = parser.skip_token (last_token_id);
bool reached_end_of_stream = did_delimit && parser.skip_token (END_OF_FILE);
if (!reached_end_of_stream)
{
// FIXME: rustc has some cases it accepts this with a warning due to
// backwards compatibility.
const_TokenPtr current_token = parser.peek_current_token ();
rust_error_at (current_token->get_locus (),
"tokens here and after are unparsed");
}
return fragment;
}
AST::Fragment
MacroExpander::parse_proc_macro_output (ProcMacro::TokenStream ts)
{
ProcMacroInvocLexer lex (convert (ts));
Parser parser (lex);
std::vector nodes;
switch (peek_context ())
{
case ContextType::ITEM:
while (lex.peek_token ()->get_id () != END_OF_FILE)
{
auto result = parser.parse_item (false);
if (result == nullptr)
break;
nodes.push_back ({std::move (result)});
}
break;
case ContextType::STMT:
while (lex.peek_token ()->get_id () != END_OF_FILE)
{
auto result = parser.parse_stmt ();
if (result == nullptr)
break;
nodes.push_back ({std::move (result)});
}
break;
case ContextType::TRAIT:
case ContextType::IMPL:
case ContextType::TRAIT_IMPL:
case ContextType::EXTERN:
case ContextType::TYPE:
case ContextType::EXPR:
default:
rust_unreachable ();
}
if (parser.has_errors ())
return AST::Fragment::create_error ();
else
return {nodes, std::vector> ()};
}
MatchedFragment &
MatchedFragmentContainer::get_single_fragment ()
{
rust_assert (is_single_fragment ());
return static_cast (*this).get_fragment ();
}
std::vector> &
MatchedFragmentContainer::get_fragments ()
{
rust_assert (!is_single_fragment ());
return static_cast (*this)
.get_fragments ();
}
void
MatchedFragmentContainer::add_fragment (MatchedFragment fragment)
{
rust_assert (!is_single_fragment ());
return static_cast (*this)
.add_fragment (fragment);
}
void
MatchedFragmentContainer::add_fragment (
std::unique_ptr fragment)
{
rust_assert (!is_single_fragment ());
return static_cast (*this)
.add_fragment (std::move (fragment));
}
std::unique_ptr
MatchedFragmentContainer::zero ()
{
return std::unique_ptr (
new MatchedFragmentContainerRepetition ());
}
std::unique_ptr
MatchedFragmentContainer::metavar (MatchedFragment fragment)
{
return std::unique_ptr (
new MatchedFragmentContainerMetaVar (fragment));
}
} // namespace Rust