diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2022-03-01 11:36:13 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-01 11:36:13 +0000 |
commit | e82b59dfc9319d72f891bac099bfa0f46d8b8c99 (patch) | |
tree | a91c15fb63b9543661d892efb5fbe94dbe73ee52 /gcc | |
parent | ed1a4dc33fa78808fefaf020015b2177d7cdc1ce (diff) | |
parent | 27be628911a8df4d83b3770f9c5491bc7b410c2f (diff) | |
download | gcc-e82b59dfc9319d72f891bac099bfa0f46d8b8c99.zip gcc-e82b59dfc9319d72f891bac099bfa0f46d8b8c99.tar.gz gcc-e82b59dfc9319d72f891bac099bfa0f46d8b8c99.tar.bz2 |
Merge #981
981: macro-expand: Add SubstitutionCtx class in its own file r=CohenArthur a=CohenArthur
The `MacroExpander` class had multiple static functions which were constantly passing the same parameters around for expansion. This refactor adds a new `SubstituteCtx` class which keeps track of the three common arguments given to the substitute functions, and offers these implementations in a new source file to keep the original expander light.
Closes #957
Co-authored-by: Arthur Cohen <arthur.cohen@embecosm.com>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/rust/Make-lang.in | 1 | ||||
-rw-r--r-- | gcc/rust/expand/rust-macro-expand.cc | 221 | ||||
-rw-r--r-- | gcc/rust/expand/rust-macro-expand.h | 58 | ||||
-rw-r--r-- | gcc/rust/expand/rust-macro-substitute-ctx.cc | 206 | ||||
-rw-r--r-- | gcc/rust/expand/rust-macro-substitute-ctx.h | 80 |
5 files changed, 291 insertions, 275 deletions
diff --git a/gcc/rust/Make-lang.in b/gcc/rust/Make-lang.in index 69943f9..b43f0f3 100644 --- a/gcc/rust/Make-lang.in +++ b/gcc/rust/Make-lang.in @@ -75,6 +75,7 @@ GRS_OBJS = \ rust/rust-compile-resolve-path.o \ rust/rust-macro-expand.o \ rust/rust-macro-invoc-lexer.o \ + rust/rust-macro-substitute-ctx.o \ rust/rust-macro-builtins.o \ rust/rust-hir-full-test.o \ rust/rust-hir-map.o \ diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc index 7552e82..a4ed36b 100644 --- a/gcc/rust/expand/rust-macro-expand.cc +++ b/gcc/rust/expand/rust-macro-expand.cc @@ -17,6 +17,7 @@ // <http://www.gnu.org/licenses/>. #include "rust-macro-expand.h" +#include "rust-macro-substitute-ctx.h" #include "rust-ast-full.h" #include "rust-ast-visitor.h" #include "rust-diagnostics.h" @@ -3773,8 +3774,10 @@ MacroExpander::transcribe_rule ( auto invoc_stream = invoc_token_tree.to_token_stream (); auto macro_rule_tokens = transcribe_tree.to_token_stream (); + auto substitute_context + = SubstituteCtx (invoc_stream, macro_rule_tokens, matched_fragments); std::vector<std::unique_ptr<AST::Token>> substituted_tokens - = substitute_tokens (invoc_stream, macro_rule_tokens, matched_fragments); + = substitute_context.substitute_tokens (); // // handy for debugging // for (auto &tok : substituted_tokens) @@ -3896,220 +3899,4 @@ MacroExpander::transcribe_rule ( return AST::ASTFragment (std::move (nodes)); } - -std::vector<std::unique_ptr<AST::Token>> -MacroExpander::substitute_metavar ( - std::vector<std::unique_ptr<AST::Token>> &input, - std::map<std::string, std::vector<MatchedFragment>> &fragments, - std::unique_ptr<AST::Token> &metavar) -{ - auto metavar_name = metavar->get_str (); - - std::vector<std::unique_ptr<AST::Token>> expanded; - auto it = fragments.find (metavar_name); - if (it == fragments.end ()) - { - // Return a copy of the original token - expanded.push_back (metavar->clone_token ()); - } - else - { - // Replace - // We only care about the vector when expanding repetitions. Just access - // the first element of the vector. - // FIXME: Clean this up so it makes more sense - auto &frag = it->second[0]; - for (size_t offs = frag.token_offset_begin; offs < frag.token_offset_end; - offs++) - { - auto &tok = input.at (offs); - expanded.push_back (tok->clone_token ()); - } - } - - return expanded; -} - -std::vector<std::unique_ptr<AST::Token>> -MacroExpander::substitute_repetition ( - std::vector<std::unique_ptr<AST::Token>> &input, - std::vector<std::unique_ptr<AST::Token>> ¯o, - std::map<std::string, std::vector<MatchedFragment>> &fragments, - size_t pattern_start, size_t pattern_end) -{ - rust_assert (pattern_end < macro.size ()); - - rust_debug ("pattern start: %lu", pattern_start); - rust_debug ("pattern end: %lu", pattern_end); - - std::vector<std::unique_ptr<AST::Token>> expanded; - - // Find the first fragment and get the amount of repetitions that we should - // perform - size_t repeat_amount = 0; - for (size_t i = pattern_start; i < pattern_end; i++) - { - if (macro.at (i)->get_id () == DOLLAR_SIGN) - { - auto &frag_token = macro.at (i + 1); - if (frag_token->get_id () == IDENTIFIER) - { - auto it = fragments.find (frag_token->get_str ()); - if (it == fragments.end ()) - { - // If the repetition is not anything we know (ie no declared - // metavars, or metavars which aren't present in the - // fragment), we can just error out. No need to paste the - // tokens as if nothing had happened. - rust_error_at (frag_token->get_locus (), - "metavar %s used in repetition does not exist", - frag_token->get_str ().c_str ()); - // FIXME: - return expanded; - } - - // FIXME: Refactor, ugly - repeat_amount = it->second[0].match_amount; - } - } - } - - rust_debug ("repetition amount to use: %lu", repeat_amount); - std::vector<std::unique_ptr<AST::Token>> new_macro; - - // We want to generate a "new macro" to substitute with. This new macro - // should contain only the tokens inside the pattern - for (size_t tok_idx = pattern_start; tok_idx < pattern_end; tok_idx++) - new_macro.emplace_back (macro.at (tok_idx)->clone_token ()); - - // Then, we want to create a subset of the matches so that - // `substitute_tokens()` can only see one fragment per metavar. Let's say we - // have the following user input: (1 145 'h') - // on the following match arm: ($($lit:literal)*) - // which causes the following matches: { "lit": [1, 145, 'h'] } - // - // The pattern (new_macro) is `$lit:literal` - // The first time we expand it, we want $lit to have the following token: 1 - // The second time, 145 - // The third and final time, 'h' - // - // In order to do so we must create "sub maps", which only contain parts of - // the original matches - // sub-maps: [ { "lit": 1 }, { "lit": 145 }, { "lit": 'h' } ] - // - // and give them to `substitute_tokens` one by one. - - for (size_t i = 0; i < repeat_amount; i++) - { - std::map<std::string, std::vector<MatchedFragment>> sub_map; - for (auto &kv_match : fragments) - { - std::vector<MatchedFragment> sub_vec; - sub_vec.emplace_back (kv_match.second[i]); - - sub_map.insert ({kv_match.first, sub_vec}); - } - - auto new_tokens = substitute_tokens (input, new_macro, sub_map); - - for (auto &new_token : new_tokens) - expanded.emplace_back (new_token->clone_token ()); - } - - // FIXME: We also need to make sure that all subsequent fragments - // contain the same amount of repetitions as the first one - - return expanded; -} - -std::pair<std::vector<std::unique_ptr<AST::Token>>, size_t> -MacroExpander::substitute_token ( - std::vector<std::unique_ptr<AST::Token>> &input, - std::vector<std::unique_ptr<AST::Token>> ¯o, - std::map<std::string, std::vector<MatchedFragment>> &fragments, - size_t token_idx) -{ - auto &token = macro.at (token_idx); - switch (token->get_id ()) - { - case IDENTIFIER: - rust_debug ("expanding metavar: %s", token->get_str ().c_str ()); - return {substitute_metavar (input, fragments, token), 1}; - case LEFT_PAREN: { - // We need to parse up until the closing delimiter and expand this - // fragment->n times. - rust_debug ("expanding repetition"); - std::vector<std::unique_ptr<AST::Token>> repetition_pattern; - size_t pattern_start = token_idx + 1; - size_t pattern_end = pattern_start; - for (; pattern_end < macro.size () - && macro.at (pattern_end)->get_id () != RIGHT_PAREN; - pattern_end++) - ; - - // FIXME: This skips whitespaces... Is that okay?? - // FIXME: Is there any existing parsing function that allows us to parse - // a macro pattern? - - // FIXME: Add error handling in the case we haven't found a matching - // closing delimiter - - // FIXME: We need to parse the repetition token now - - return { - substitute_repetition (input, macro, fragments, pattern_start, - pattern_end), - // + 2 for the opening and closing parentheses which are mandatory - // + 1 for the repetitor (+, *, ?) - pattern_end - pattern_start + 3}; - } - // TODO: We need to check if the $ was alone. In that case, do - // not error out: Simply act as if there was an empty identifier - // with no associated fragment and paste the dollar sign in the - // transcription. Unsure how to do that since we always have at - // least the closing curly brace after an empty $... - default: - rust_error_at (token->get_locus (), - "unexpected token in macro transcribe: expected " - "%<(%> or identifier after %<$%>, got %<%s%>", - get_token_description (token->get_id ())); - } - - // FIXME: gcc_unreachable() error case? - return {std::vector<std::unique_ptr<AST::Token>> (), 0}; -} - -std::vector<std::unique_ptr<AST::Token>> -MacroExpander::substitute_tokens ( - std::vector<std::unique_ptr<AST::Token>> &input, - std::vector<std::unique_ptr<AST::Token>> ¯o, - std::map<std::string, std::vector<MatchedFragment>> &fragments) -{ - std::vector<std::unique_ptr<AST::Token>> replaced_tokens; - - for (size_t i = 0; i < macro.size (); i++) - { - auto &tok = macro.at (i); - if (tok->get_id () == DOLLAR_SIGN) - { - // Aaaaah, if only we had C++17 :) - // auto [expanded, tok_to_skip] = ... - auto p = substitute_token (input, macro, fragments, i + 1); - auto expanded = std::move (p.first); - auto tok_to_skip = p.second; - - i += tok_to_skip; - - for (auto &token : expanded) - replaced_tokens.emplace_back (token->clone_token ()); - } - else - { - replaced_tokens.emplace_back (tok->clone_token ()); - } - } - - return replaced_tokens; -} - } // namespace Rust diff --git a/gcc/rust/expand/rust-macro-expand.h b/gcc/rust/expand/rust-macro-expand.h index eeafdb8..9309323 100644 --- a/gcc/rust/expand/rust-macro-expand.h +++ b/gcc/rust/expand/rust-macro-expand.h @@ -213,64 +213,6 @@ struct MacroExpander size_t &match_amount, size_t lo_bound = 0, size_t hi_bound = 0); - /** - * Substitute a metavariable by its given fragment in a transcribing context, - * i.e. replacing $var with the associated fragment. - * - * @param input Tokens given to the transcribing context - * @param fragments Fragments given to the macro substitution - * @param metavar Metavariable to try and replace - * - * @return A token containing the associated fragment expanded into tokens if - * any, or the cloned token if no fragment was associated - */ - static std::vector<std::unique_ptr<AST::Token>> substitute_metavar ( - std::vector<std::unique_ptr<AST::Token>> &input, - std::map<std::string, std::vector<MatchedFragment>> &fragments, - std::unique_ptr<AST::Token> &metavar); - - /** - * Substitute a macro repetition by its given fragments - * - * @param input Tokens given to the transcribing context - * @param fragments Fragments given to the macro substitution - * @param pattern_start Start index of the pattern tokens - * @param pattern_end Index Amount of tokens in the pattern - * - * @return A vector containing the repeated pattern - */ - static std::vector<std::unique_ptr<AST::Token>> substitute_repetition ( - std::vector<std::unique_ptr<AST::Token>> &input, - std::vector<std::unique_ptr<AST::Token>> ¯o, - std::map<std::string, std::vector<MatchedFragment>> &fragments, - size_t pattern_start, size_t pattern_end); - - /** - * Substitute a given token by its appropriate representation - * - * @param macro Tokens used in the macro declaration - * @param input Tokens given to the transcribing context - * @param fragments Fragments given to the macro substitution - * @param token Current token to try and substitute - * - * @return A token containing the associated fragment expanded into tokens if - * any, or the cloned token if no fragment was associated, as well as the - * amount of tokens that should be skipped before the next invocation. Since - * this function may consume more than just one token, it is important to skip - * ahead of the input to avoid mis-substitutions - */ - static std::pair<std::vector<std::unique_ptr<AST::Token>>, size_t> - substitute_token ( - std::vector<std::unique_ptr<AST::Token>> &input, - std::vector<std::unique_ptr<AST::Token>> ¯o, - std::map<std::string, std::vector<MatchedFragment>> &fragments, - size_t token_idx); - - static std::vector<std::unique_ptr<AST::Token>> substitute_tokens ( - std::vector<std::unique_ptr<AST::Token>> &input, - std::vector<std::unique_ptr<AST::Token>> ¯o, - std::map<std::string, std::vector<MatchedFragment>> &fragments); - void push_context (ContextType t) { context.push_back (t); } ContextType pop_context () diff --git a/gcc/rust/expand/rust-macro-substitute-ctx.cc b/gcc/rust/expand/rust-macro-substitute-ctx.cc new file mode 100644 index 0000000..8542614 --- /dev/null +++ b/gcc/rust/expand/rust-macro-substitute-ctx.cc @@ -0,0 +1,206 @@ +#include "rust-macro-substitute-ctx.h" + +namespace Rust { + +std::vector<std::unique_ptr<AST::Token>> +SubstituteCtx::substitute_metavar (std::unique_ptr<AST::Token> &metavar) +{ + auto metavar_name = metavar->get_str (); + + std::vector<std::unique_ptr<AST::Token>> expanded; + auto it = fragments.find (metavar_name); + if (it == fragments.end ()) + { + // Return a copy of the original token + expanded.push_back (metavar->clone_token ()); + } + else + { + // Replace + // We only care about the vector when expanding repetitions. Just access + // the first element of the vector. + // FIXME: Clean this up so it makes more sense + auto &frag = it->second[0]; + for (size_t offs = frag.token_offset_begin; offs < frag.token_offset_end; + offs++) + { + auto &tok = input.at (offs); + expanded.push_back (tok->clone_token ()); + } + } + + return expanded; +} + +std::vector<std::unique_ptr<AST::Token>> +SubstituteCtx::substitute_repetition (size_t pattern_start, size_t pattern_end) +{ + rust_assert (pattern_end < macro.size ()); + + rust_debug ("pattern start: %lu", pattern_start); + rust_debug ("pattern end: %lu", pattern_end); + + std::vector<std::unique_ptr<AST::Token>> expanded; + + // Find the first fragment and get the amount of repetitions that we should + // perform + size_t repeat_amount = 0; + for (size_t i = pattern_start; i < pattern_end; i++) + { + if (macro.at (i)->get_id () == DOLLAR_SIGN) + { + auto &frag_token = macro.at (i + 1); + if (frag_token->get_id () == IDENTIFIER) + { + auto it = fragments.find (frag_token->get_str ()); + if (it == fragments.end ()) + { + // If the repetition is not anything we know (ie no declared + // metavars, or metavars which aren't present in the + // fragment), we can just error out. No need to paste the + // tokens as if nothing had happened. + rust_error_at (frag_token->get_locus (), + "metavar %s used in repetition does not exist", + frag_token->get_str ().c_str ()); + // FIXME: + return expanded; + } + + // FIXME: Refactor, ugly + repeat_amount = it->second[0].match_amount; + } + } + } + + rust_debug ("repetition amount to use: %lu", repeat_amount); + std::vector<std::unique_ptr<AST::Token>> new_macro; + + // We want to generate a "new macro" to substitute with. This new macro + // should contain only the tokens inside the pattern + for (size_t tok_idx = pattern_start; tok_idx < pattern_end; tok_idx++) + new_macro.emplace_back (macro.at (tok_idx)->clone_token ()); + + // Then, we want to create a subset of the matches so that + // `substitute_tokens()` can only see one fragment per metavar. Let's say we + // have the following user input: (1 145 'h') + // on the following match arm: ($($lit:literal)*) + // which causes the following matches: { "lit": [1, 145, 'h'] } + // + // The pattern (new_macro) is `$lit:literal` + // The first time we expand it, we want $lit to have the following token: 1 + // The second time, 145 + // The third and final time, 'h' + // + // In order to do so we must create "sub maps", which only contain parts of + // the original matches + // sub-maps: [ { "lit": 1 }, { "lit": 145 }, { "lit": 'h' } ] + // + // and give them to `substitute_tokens` one by one. + + for (size_t i = 0; i < repeat_amount; i++) + { + std::map<std::string, std::vector<MatchedFragment>> sub_map; + for (auto &kv_match : fragments) + { + std::vector<MatchedFragment> sub_vec; + sub_vec.emplace_back (kv_match.second[i]); + + sub_map.insert ({kv_match.first, sub_vec}); + } + + auto substitute_context = SubstituteCtx (input, new_macro, sub_map); + auto new_tokens = substitute_context.substitute_tokens (); + + for (auto &new_token : new_tokens) + expanded.emplace_back (new_token->clone_token ()); + } + + // FIXME: We also need to make sure that all subsequent fragments + // contain the same amount of repetitions as the first one + + return expanded; +} + +std::pair<std::vector<std::unique_ptr<AST::Token>>, size_t> +SubstituteCtx::substitute_token (size_t token_idx) +{ + auto &token = macro.at (token_idx); + switch (token->get_id ()) + { + case IDENTIFIER: + rust_debug ("expanding metavar: %s", token->get_str ().c_str ()); + return {substitute_metavar (token), 1}; + case LEFT_PAREN: { + // We need to parse up until the closing delimiter and expand this + // fragment->n times. + rust_debug ("expanding repetition"); + std::vector<std::unique_ptr<AST::Token>> repetition_pattern; + size_t pattern_start = token_idx + 1; + size_t pattern_end = pattern_start; + for (; pattern_end < macro.size () + && macro.at (pattern_end)->get_id () != RIGHT_PAREN; + pattern_end++) + ; + + // FIXME: This skips whitespaces... Is that okay?? + // FIXME: Is there any existing parsing function that allows us to parse + // a macro pattern? + + // FIXME: Add error handling in the case we haven't found a matching + // closing delimiter + + // FIXME: We need to parse the repetition token now + + return { + substitute_repetition (pattern_start, pattern_end), + // + 2 for the opening and closing parentheses which are mandatory + // + 1 for the repetitor (+, *, ?) + pattern_end - pattern_start + 3}; + } + // TODO: We need to check if the $ was alone. In that case, do + // not error out: Simply act as if there was an empty identifier + // with no associated fragment and paste the dollar sign in the + // transcription. Unsure how to do that since we always have at + // least the closing curly brace after an empty $... + default: + rust_error_at (token->get_locus (), + "unexpected token in macro transcribe: expected " + "%<(%> or identifier after %<$%>, got %<%s%>", + get_token_description (token->get_id ())); + } + + // FIXME: gcc_unreachable() error case? + return {std::vector<std::unique_ptr<AST::Token>> (), 0}; +} + +std::vector<std::unique_ptr<AST::Token>> +SubstituteCtx::substitute_tokens () +{ + std::vector<std::unique_ptr<AST::Token>> replaced_tokens; + + for (size_t i = 0; i < macro.size (); i++) + { + auto &tok = macro.at (i); + if (tok->get_id () == DOLLAR_SIGN) + { + // Aaaaah, if only we had C++17 :) + // auto [expanded, tok_to_skip] = ... + auto p = substitute_token (i + 1); + auto expanded = std::move (p.first); + auto tok_to_skip = p.second; + + i += tok_to_skip; + + for (auto &token : expanded) + replaced_tokens.emplace_back (token->clone_token ()); + } + else + { + replaced_tokens.emplace_back (tok->clone_token ()); + } + } + + return replaced_tokens; +} + +} // namespace Rust diff --git a/gcc/rust/expand/rust-macro-substitute-ctx.h b/gcc/rust/expand/rust-macro-substitute-ctx.h new file mode 100644 index 0000000..d51fb81 --- /dev/null +++ b/gcc/rust/expand/rust-macro-substitute-ctx.h @@ -0,0 +1,80 @@ +// Copyright (C) 2020-2022 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +#include "rust-ast.h" +#include "rust-macro-expand.h" + +namespace Rust { +class SubstituteCtx +{ + std::vector<std::unique_ptr<AST::Token>> &input; + std::vector<std::unique_ptr<AST::Token>> ¯o; + std::map<std::string, std::vector<MatchedFragment>> &fragments; + +public: + SubstituteCtx (std::vector<std::unique_ptr<AST::Token>> &input, + std::vector<std::unique_ptr<AST::Token>> ¯o, + std::map<std::string, std::vector<MatchedFragment>> &fragments) + : input (input), macro (macro), fragments (fragments) + {} + + /** + * Substitute a metavariable by its given fragment in a transcribing context, + * i.e. replacing $var with the associated fragment. + * + * @param metavar Metavariable to try and replace + * + * @return A token containing the associated fragment expanded into tokens if + * any, or the cloned token if no fragment was associated + */ + std::vector<std::unique_ptr<AST::Token>> + substitute_metavar (std::unique_ptr<AST::Token> &metavar); + + /** + * Substitute a macro repetition by its given fragments + * + * @param pattern_start Start index of the pattern tokens + * @param pattern_end Index Amount of tokens in the pattern + * + * @return A vector containing the repeated pattern + */ + std::vector<std::unique_ptr<AST::Token>> + substitute_repetition (size_t pattern_start, size_t pattern_end); + + /** + * Substitute a given token by its appropriate representation + * + * @param token_idx Current token to try and substitute + * + * @return A token containing the associated fragment expanded into tokens if + * any, or the cloned token if no fragment was associated, as well as the + * amount of tokens that should be skipped before the next invocation. Since + * this function may consume more than just one token, it is important to skip + * ahead of the input to avoid mis-substitutions + */ + std::pair<std::vector<std::unique_ptr<AST::Token>>, size_t> + substitute_token (size_t token_idx); + + /** + * Substitute all tokens by their appropriate representation + * + * @return A vector containing the substituted tokens + */ + std::vector<std::unique_ptr<AST::Token>> substitute_tokens (); +}; +} // namespace Rust |