Merge #1750

1750: Implement fixed point macro expansion r=CohenArthur a=CohenArthur This is a cleanup of #1606's branch, which also contains the code necessary for performing eager macro expansion in builtin macros. This commit changes our macro expansion system from an eager and recursive macro expansion to a fixed-point like system. Instead of, when seeing a macro invocation, expanding it and all of the macros within it, we now perform multiple passes of expansion on the entire crate. This, however, leads to a problem. Rust macros are expanded lazily, but Rust builtin macros should be expanded eagerly. Due to this, we must work around the lazy expansion in builtin macros and perform eager expansion for each pass of the fixed-point, before finally expanding the builtin when there are no longer any inner macro invocations. To perform proper macro scoping, the ENR now keeps track of the current scope (`current_scope` member) and resolves macros accordingly. This is done through the use of the `scoped` method, which creates a new scope, runs a specified lambda and then exits the scope. This prevents pushing/popping errors that we've seen happen already in similar contexts. We might think about generalizing it to other classes, providing a `Scoped<EntryFn, ExitFn>` class or similar Fixes #1795 Co-authored-by: Arthur Cohen <arthur.cohen@embecosm.com>
author: bors[bot] <26634292+bors[bot]@users.noreply.github.com> 2023-02-13 11:15:24 +0000
committer: GitHub <noreply@github.com> 2023-02-13 11:15:24 +0000
commit: 59d4ee6cc1b14ddc01d062a41bec364e959793c7 (patch)
tree: dc3137f8905d5ad3cebefb6b766859d49944a9eb /gcc/rust/expand/rust-macro-expand.cc
parent: 3a8c8d0d41b57fbb49ab39715b70495d5d1e8dd1 (diff)
parent: f67f5409d2fe4191ab24b5eb634c26306386fb25 (diff)
download: gcc-59d4ee6cc1b14ddc01d062a41bec364e959793c7.zip
gcc-59d4ee6cc1b14ddc01d062a41bec364e959793c7.tar.gz
gcc-59d4ee6cc1b14ddc01d062a41bec364e959793c7.tar.bz2
1 files changed, 142 insertions, 4 deletions
diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc
index 36b3195..597cb6a 100644
--- a/gcc/rust/expand/rust-macro-expand.cc
+++ b/gcc/rust/expand/rust-macro-expand.cc
@@ -111,6 +111,122 @@ MacroExpander::expand_decl_macro (Location invoc_locus,
 }
 
 void
+MacroExpander::expand_eager_invocations (AST::MacroInvocation &invoc)
+{
+  if (invoc.get_pending_eager_invocations ().empty ())
+    return;
+
+  // We have to basically create a new delimited token tree which contains the
+  // result of one step of expansion. In the case of builtin macros called with
+  // other macro invocations, such as `concat!("h", 'a', a!())`, we need to
+  // expand `a!()` before expanding the concat macro.
+  // This will, ideally, give us a new token tree containing the various
+  // existing tokens + the result of the expansion of a!().
+  // To do this, we "parse" the given token tree to find anything that "looks
+  // like a macro invocation". Then, we get the corresponding macro invocation
+  // from the `pending_eager_invocations` vector and expand it.
+  // Because the `pending_eager_invocations` vector is created in the same order
+  // that the DelimTokenTree is parsed, we know that the first macro invocation
+  // within the DelimTokenTree corresponds to the first element in
+  // `pending_eager_invocations`. The idea is thus to:
+  // 1. Find a macro invocation in the token tree, noting the index of the start
+  //    token and of the end token
+  // 2. Get its associated invocation in `pending_eager_invocations`
+  // 3. Expand that element
+  // 4. Get the token tree associated with that AST fragment
+  // 5. Replace the original tokens corresponding to the invocation with the new
+  //    tokens from the fragment
+  // pseudo-code:
+  //
+  // i = 0;
+  // for tok in dtt:
+  //   if tok is identifier && tok->next() is !:
+  //     start = index(tok);
+  //     l_delim = tok->next()->next();
+  //     tok = skip_until_r_delim();
+  //     end = index(tok);
+  //
+  //     new_tt = expand_eager_invoc(eagers[i++]);
+  //     old_tt[start..end] = new_tt;
+
+  auto dtt = invoc.get_invoc_data ().get_delim_tok_tree ();
+  auto stream = dtt.to_token_stream ();
+  std::vector<std::unique_ptr<AST::TokenTree>> new_stream;
+  size_t current_pending = 0;
+
+  // we need to create a clone of the delimited token tree as the lexer
+  // expects ownership of the tokens
+  std::vector<std::unique_ptr<Rust::AST::Token>> dtt_clone;
+  for (auto &tok : stream)
+    dtt_clone.emplace_back (tok->clone_token ());
+
+  MacroInvocLexer lex (std::move (dtt_clone));
+  Parser<MacroInvocLexer> parser (lex);
+
+  // we want to build a substitution map - basically, associating a `start` and
+  // `end` index for each of the pending macro invocations
+  std::map<std::pair<size_t, size_t>, std::unique_ptr<AST::MacroInvocation> &>
+    substitution_map;
+
+  for (size_t i = 0; i < stream.size (); i++)
+    {
+      // FIXME: Can't these offsets be figure out when we actually parse the
+      // pending_eager_invocation in the first place?
+      auto invocation = parser.parse_macro_invocation ({});
+
+      // if we've managed to parse a macro invocation, we look at the current
+      // offset and store them in the substitution map. Otherwise, we skip one
+      // token and try parsing again
+      if (invocation)
+	substitution_map.insert (
+	  {{i, parser.get_token_source ().get_offs ()},
+	   invoc.get_pending_eager_invocations ()[current_pending++]});
+      else
+	parser.skip_token (stream[i]->get_id ());
+    }
+
+  size_t current_idx = 0;
+  for (auto kv : substitution_map)
+    {
+      auto &to_expand = kv.second;
+      expand_invoc (*to_expand, false);
+
+      auto fragment = take_expanded_fragment ();
+      auto &new_tokens = fragment.get_tokens ();
+
+      auto start = kv.first.first;
+      auto end = kv.first.second;
+
+      // We're now going to re-add the tokens to the invocation's token tree.
+      // 1. Basically, what we want to do is insert all tokens up until the
+      //    beginning of the macro invocation (start).
+      // 2. Then, we'll insert all of the tokens resulting from the macro
+      //    expansion: These are in `new_tokens`.
+      // 3. Finally, we'll do that again from
+      //    the end of macro and go back to 1.
+
+      for (size_t i = current_idx; i < start; i++)
+	new_stream.emplace_back (stream[i]->clone_token ());
+
+      for (auto &tok : new_tokens)
+	new_stream.emplace_back (tok->clone_token ());
+
+      current_idx = end;
+    }
+
+  // Once all of that is done, we copy the last remaining tokens from the
+  // original stream
+  for (size_t i = current_idx; i < stream.size (); i++)
+    new_stream.emplace_back (stream[i]->clone_token ());
+
+  auto new_dtt
+    = AST::DelimTokenTree (dtt.get_delim_type (), std::move (new_stream));
+
+  invoc.get_pending_eager_invocations ().clear ();
+  invoc.get_invoc_data ().set_delim_tok_tree (new_dtt);
+}
+
+void
 MacroExpander::expand_invoc (AST::MacroInvocation &invoc, bool has_semicolon)
 {
   if (depth_exceeds_recursion_limit ())
@@ -119,6 +235,9 @@ MacroExpander::expand_invoc (AST::MacroInvocation &invoc, bool has_semicolon)
       return;
     }
 
+  if (invoc.get_kind () == AST::MacroInvocation::InvocKind::Builtin)
+    expand_eager_invocations (invoc);
+
   AST::MacroInvocData &invoc_data = invoc.get_invoc_data ();
 
   // ??
@@ -151,6 +270,11 @@ MacroExpander::expand_invoc (AST::MacroInvocation &invoc, bool has_semicolon)
   if (!ok)
     return;
 
+  // We store the last expanded invocation and macro definition for error
+  // reporting in case the recursion limit is reached
+  last_invoc = &invoc;
+  last_def = rules_def;
+
   if (rules_def->is_builtin ())
     fragment
       = rules_def->get_builtin_transcriber () (invoc.get_locus (), invoc_data);
@@ -292,7 +416,7 @@ MacroExpander::expand_crate ()
       // mark for stripping if required
       item->accept_vis (attr_visitor);
 
-      auto fragment = take_expanded_fragment (attr_visitor);
+      auto fragment = take_expanded_fragment ();
       if (fragment.should_expand ())
 	{
 	  // Remove the current expanded invocation
@@ -711,6 +835,9 @@ static AST::Fragment
 parse_many (Parser<MacroInvocLexer> &parser, TokenId &delimiter,
 	    std::function<AST::SingleASTNode ()> parse_fn)
 {
+  auto &lexer = parser.get_token_source ();
+  auto start = lexer.get_offs ();
+
   std::vector<AST::SingleASTNode> nodes;
   while (true)
     {
@@ -728,8 +855,9 @@ parse_many (Parser<MacroInvocLexer> &parser, TokenId &delimiter,
 
       nodes.emplace_back (std::move (node));
     }
+  auto end = lexer.get_offs ();
 
-  return AST::Fragment::complete (std::move (nodes));
+  return AST::Fragment (std::move (nodes), lexer.get_token_slice (start, end));
 }
 
 /**
@@ -838,11 +966,16 @@ transcribe_many_stmts (Parser<MacroInvocLexer> &parser, TokenId &delimiter)
 static AST::Fragment
 transcribe_expression (Parser<MacroInvocLexer> &parser)
 {
+  auto &lexer = parser.get_token_source ();
+  auto start = lexer.get_offs ();
+
   auto expr = parser.parse_expr ();
   if (expr == nullptr)
     return AST::Fragment::create_error ();
 
-  return AST::Fragment::complete ({std::move (expr)});
+  auto end = lexer.get_offs ();
+
+  return AST::Fragment ({std::move (expr)}, lexer.get_token_slice (start, end));
 }
 
 /**
@@ -853,11 +986,16 @@ transcribe_expression (Parser<MacroInvocLexer> &parser)
 static AST::Fragment
 transcribe_type (Parser<MacroInvocLexer> &parser)
 {
+  auto &lexer = parser.get_token_source ();
+  auto start = lexer.get_offs ();
+
   auto type = parser.parse_type (true);
   for (auto err : parser.get_errors ())
     err.emit_error ();
 
-  return AST::Fragment::complete ({std::move (type)});
+  auto end = lexer.get_offs ();
+
+  return AST::Fragment ({std::move (type)}, lexer.get_token_slice (start, end));
 }
 
 static AST::Fragment
author	bors[bot] <26634292+bors[bot]@users.noreply.github.com>	2023-02-13 11:15:24 +0000
committer	GitHub <noreply@github.com>	2023-02-13 11:15:24 +0000
commit	59d4ee6cc1b14ddc01d062a41bec364e959793c7 (patch)
tree	dc3137f8905d5ad3cebefb6b766859d49944a9eb /gcc/rust/expand/rust-macro-expand.cc
parent	3a8c8d0d41b57fbb49ab39715b70495d5d1e8dd1 (diff)
parent	f67f5409d2fe4191ab24b5eb634c26306386fb25 (diff)
download	gcc-59d4ee6cc1b14ddc01d062a41bec364e959793c7.zip gcc-59d4ee6cc1b14ddc01d062a41bec364e959793c7.tar.gz gcc-59d4ee6cc1b14ddc01d062a41bec364e959793c7.tar.bz2