From ae1f91a698022a5600a2d54e48fc90895ea834fd Mon Sep 17 00:00:00 2001 From: Arthur Cohen Date: Fri, 18 Feb 2022 18:22:08 +0100 Subject: transcribe: Move substitute_metavar in its own function --- gcc/rust/ast/rust-ast.h | 1 + gcc/rust/expand/rust-macro-expand.cc | 178 ++++++++++++++++++++++++++--------- gcc/rust/expand/rust-macro-expand.h | 34 +++++++ 3 files changed, 166 insertions(+), 47 deletions(-) (limited to 'gcc') diff --git a/gcc/rust/ast/rust-ast.h b/gcc/rust/ast/rust-ast.h index e72937e..dfd0b3e 100644 --- a/gcc/rust/ast/rust-ast.h +++ b/gcc/rust/ast/rust-ast.h @@ -204,6 +204,7 @@ public: std::vector > to_token_stream () const override; TokenId get_id () const { return tok_ref->get_id (); } + const std::string &get_str () const { return tok_ref->get_str (); } Location get_locus () const { return tok_ref->get_locus (); } diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc index ff0f169..7dc8f88 100644 --- a/gcc/rust/expand/rust-macro-expand.cc +++ b/gcc/rust/expand/rust-macro-expand.cc @@ -3754,10 +3754,10 @@ MacroExpander::transcribe_rule ( = substitute_tokens (invoc_stream, macro_rule_tokens, matched_fragments); // // handy for debugging - // for (auto &tok : substituted_tokens) - // { - // rust_debug ("tok: [%s]", tok->as_string ().c_str ()); - // } + for (auto &tok : substituted_tokens) + { + rust_debug ("tok: [%s]", tok->as_string ().c_str ()); + } // parse it to an ASTFragment MacroInvocLexer lex (std::move (substituted_tokens)); @@ -3875,6 +3875,67 @@ MacroExpander::transcribe_rule ( } std::vector> +MacroExpander::substitute_metavar ( + std::vector> &input, + std::map &fragments, + std::unique_ptr &metavar) +{ + auto metavar_name = metavar->get_str (); + + rust_debug ("expanding metavar: %s", metavar_name.c_str ()); + std::vector> expanded; + auto it = fragments.find (metavar_name); + if (it == fragments.end ()) + { + // Return a copy of the original token + expanded.push_back (metavar->clone_token ()); + } + else + { + // Replace + MatchedFragment &frag = it->second; + for (size_t offs = frag.token_offset_begin; offs < frag.token_offset_end; + offs++) + { + auto &tok = input.at (offs); + expanded.push_back (tok->clone_token ()); + } + } + + return expanded; +} + +std::pair>, size_t> +MacroExpander::substitute_token ( + std::vector> &input, + std::map &fragments, + std::unique_ptr &token) +{ + switch (token->get_id ()) + { + case IDENTIFIER: + rust_debug ("expanding metavar"); + return {substitute_metavar (input, fragments, token), 1}; + case LEFT_PAREN: + rust_debug ("expanding repetition"); + break; + // TODO: We need to check if the $ was alone. In that case, do + // not error out: Simply act as if there was an empty identifier + // with no associated fragment and paste the dollar sign in the + // transcription. Unsure how to do that since we always have at + // least the closing curly brace after an empty $... + default: + rust_error_at (token->get_locus (), + "unexpected token in macro transcribe: expected " + "%<(%> or identifier after %<$%>, got %<%s%>", + get_token_description (token->get_id ())); + } + + // FIXME: gcc_unreachable() error case? + return {std::vector> (), 0}; +} + +std::vector> MacroExpander::substitute_tokens ( std::vector> &input, std::vector> ¯o, @@ -3882,60 +3943,83 @@ MacroExpander::substitute_tokens ( { std::vector> replaced_tokens; + // for token in macro + // if token == ?: + // // That's not always true: If it's a left paren, it's repetition + // // We probably want to store the matched amount in the fragment so + // // we can expand it here + // id = next_token(); + // frag = fragment.find(id); + for (size_t i = 0; i < macro.size (); i++) { auto &tok = macro.at (i); if (tok->get_id () == DOLLAR_SIGN) { - std::vector> parsed_toks; + auto &next_tok = macro.at (i + 1); + // Aaaaah, if only we had C++17 :) + // auto [expanded, tok_to_skip] = ... + auto p = substitute_token (input, fragments, next_tok); + auto expanded = std::move (p.first); + auto tok_to_skip = p.second; - std::string ident; - for (size_t offs = i; i < macro.size (); offs++) - { - auto &tok = macro.at (offs); - if (tok->get_id () == DOLLAR_SIGN && offs == i) - { - parsed_toks.push_back (tok->clone_token ()); - } - else if (tok->get_id () == IDENTIFIER) - { - rust_assert (tok->as_string ().size () == 1); - ident.push_back (tok->as_string ().at (0)); - parsed_toks.push_back (tok->clone_token ()); - } - else - { - break; - } - } + i += tok_to_skip; - // lookup the ident - auto it = fragments.find (ident); - if (it == fragments.end ()) - { - // just leave the tokens in - for (auto &tok : parsed_toks) - { - replaced_tokens.push_back (tok->clone_token ()); - } - } - else - { - // replace - MatchedFragment &frag = it->second; - for (size_t offs = frag.token_offset_begin; - offs < frag.token_offset_end; offs++) - { - auto &tok = input.at (offs); - replaced_tokens.push_back (tok->clone_token ()); - } - } - i += parsed_toks.size () - 1; + for (auto &token : expanded) + replaced_tokens.emplace_back (token->clone_token ()); } else { - replaced_tokens.push_back (tok->clone_token ()); + replaced_tokens.emplace_back (tok->clone_token ()); } + + // std::vector> parsed_toks; + + // std::string ident; + // for (size_t offs = i; i < macro.size (); offs++) + // { + // auto &tok = macro.at (offs); + // if (tok->get_id () == DOLLAR_SIGN && offs == i) + // { + // parsed_toks.push_back (tok->clone_token ()); + // } + // else if (tok->get_id () == IDENTIFIER) + // { + // rust_assert (tok->as_string ().size () == 1); + // ident.push_back (tok->as_string ().at (0)); + // parsed_toks.push_back (tok->clone_token ()); + // } + // else + // { + // break; + // } + // } + + // // lookup the ident + // auto it = fragments.find (ident); + // if (it == fragments.end ()) + // { + // // just leave the tokens in + // for (auto &tok : parsed_toks) + // { + // replaced_tokens.push_back (tok->clone_token ()); + // } + // } + // else + // { + // // replace + // MatchedFragment &frag = it->second; + // for (size_t offs = frag.token_offset_begin; + // offs < frag.token_offset_end; offs++) + // { + // auto &tok = input.at (offs); + // replaced_tokens.push_back (tok->clone_token ()); + // } + // } + // i += parsed_toks.size () - 1; + // + // } + // else { replaced_tokens.push_back (tok->clone_token ()); } } return replaced_tokens; diff --git a/gcc/rust/expand/rust-macro-expand.h b/gcc/rust/expand/rust-macro-expand.h index edb091d..f77acc7 100644 --- a/gcc/rust/expand/rust-macro-expand.h +++ b/gcc/rust/expand/rust-macro-expand.h @@ -183,6 +183,40 @@ struct MacroExpander size_t &match_amount, size_t lo_bound = 0, size_t hi_bound = 0); + /** + * Substitute a metavariable by its given fragment in a transcribing context, + * i.e. replacing $var with the associated fragment. + * + * @param input Tokens given to the transcribing context + * @param fragments Fragments given to the macro substitution + * @param metavar Metavariable to try and replace + * + * @return A token containing the associated fragment expanded into tokens if + * any, or the cloned token if no fragment was associated + */ + static std::vector> + substitute_metavar (std::vector> &input, + std::map &fragments, + std::unique_ptr &metavar); + + /** + * Substitute a given token by its appropriate representation + * + * @param input Tokens given to the transcribing context + * @param fragments Fragments given to the macro substitution + * @param token Current token to try and substitute + * + * @return A token containing the associated fragment expanded into tokens if + * any, or the cloned token if no fragment was associated, as well as the + * amount of tokens that should be skipped before the next invocation. Since + * this function may consume more than just one token, it is important to skip + * ahead of the input to avoid mis-substitutions + */ + static std::pair>, size_t> + substitute_token (std::vector> &input, + std::map &fragments, + std::unique_ptr &token); + static std::vector> substitute_tokens (std::vector> &input, std::vector> ¯o, -- cgit v1.1 From 143aad62e16ea96e8c562b96857c2497f74ba7c7 Mon Sep 17 00:00:00 2001 From: Arthur Cohen Date: Mon, 21 Feb 2022 10:48:00 +0100 Subject: substitute_repetition: Add parsing of repetition pattern --- gcc/rust/expand/rust-macro-expand.cc | 51 +++++++++++++++++++++++++++++++----- gcc/rust/expand/rust-macro-expand.h | 20 ++++++++++++-- 2 files changed, 62 insertions(+), 9 deletions(-) (limited to 'gcc') diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc index 7dc8f88..7aa4289 100644 --- a/gcc/rust/expand/rust-macro-expand.cc +++ b/gcc/rust/expand/rust-macro-expand.cc @@ -3905,20 +3905,58 @@ MacroExpander::substitute_metavar ( return expanded; } +std::vector> +MacroExpander::substitute_repetition ( + std::vector> &input, + std::map &fragments, + std::vector> &pattern) +{ + // If the repetition is not anything we know (ie no declared metavars, or + // metavars which aren't present in the fragment), we can just error out. No + // need to paste the tokens as if nothing had happened. + for (auto &token : pattern) + rust_debug ("[repetition pattern]: %s", token->as_string ().c_str ()); + + return std::vector> (); +} + std::pair>, size_t> MacroExpander::substitute_token ( + std::vector> ¯o, std::vector> &input, - std::map &fragments, - std::unique_ptr &token) + std::map &fragments, size_t token_idx) { + auto &token = macro.at (token_idx); switch (token->get_id ()) { case IDENTIFIER: rust_debug ("expanding metavar"); return {substitute_metavar (input, fragments, token), 1}; - case LEFT_PAREN: - rust_debug ("expanding repetition"); - break; + case LEFT_PAREN: { + // We need to parse up until the closing delimiter and expand this + // fragment->n times. + rust_debug ("expanding repetition"); + std::vector> repetition_pattern; + for (size_t rep_idx = token_idx + 1; + rep_idx < macro.size () + && macro.at (rep_idx)->get_id () != RIGHT_PAREN; + rep_idx++) + repetition_pattern.emplace_back (macro.at (rep_idx)->clone_token ()); + + // FIXME: This skips whitespaces... Is that okay?? + // FIXME: Is there any existing parsing function that allows us to parse + // a macro pattern? + + // FIXME: Add error handling in the case we haven't found a matching + // closing delimiter + + // FIXME: We need to parse the repetition token now + + return { + substitute_repetition (input, fragments, repetition_pattern), + // + 2 for the opening and closing parenthesis which are mandatory + repetition_pattern.size () + 2}; + } // TODO: We need to check if the $ was alone. In that case, do // not error out: Simply act as if there was an empty identifier // with no associated fragment and paste the dollar sign in the @@ -3956,10 +3994,9 @@ MacroExpander::substitute_tokens ( auto &tok = macro.at (i); if (tok->get_id () == DOLLAR_SIGN) { - auto &next_tok = macro.at (i + 1); // Aaaaah, if only we had C++17 :) // auto [expanded, tok_to_skip] = ... - auto p = substitute_token (input, fragments, next_tok); + auto p = substitute_token (macro, input, fragments, i + 1); auto expanded = std::move (p.first); auto tok_to_skip = p.second; diff --git a/gcc/rust/expand/rust-macro-expand.h b/gcc/rust/expand/rust-macro-expand.h index f77acc7..943115d 100644 --- a/gcc/rust/expand/rust-macro-expand.h +++ b/gcc/rust/expand/rust-macro-expand.h @@ -200,8 +200,23 @@ struct MacroExpander std::unique_ptr &metavar); /** + * Substitute a macro repetition by its given fragments + * + * @param input Tokens given to the transcribing context + * @param fragments Fragments given to the macro substitution + * @param repetition Set of tokens to substitute and replace + * + * @return A vector containing the repeated pattern + */ + static std::vector> + substitute_repetition (std::vector> &input, + std::map &fragments, + std::vector> &pattern); + + /** * Substitute a given token by its appropriate representation * + * @param macro Tokens used in the macro declaration * @param input Tokens given to the transcribing context * @param fragments Fragments given to the macro substitution * @param token Current token to try and substitute @@ -213,9 +228,10 @@ struct MacroExpander * ahead of the input to avoid mis-substitutions */ static std::pair>, size_t> - substitute_token (std::vector> &input, + substitute_token (std::vector> ¯o, + std::vector> &input, std::map &fragments, - std::unique_ptr &token); + size_t token_idx); static std::vector> substitute_tokens (std::vector> &input, -- cgit v1.1 From ded1aca9022faa4a72a97f117ace085ecb34487f Mon Sep 17 00:00:00 2001 From: Arthur Cohen Date: Mon, 21 Feb 2022 13:49:40 +0100 Subject: substitute_repetition: Substitute repetitions properly --- gcc/rust/expand/rust-macro-expand.cc | 94 ++++++++++++++++++++++++++++-------- gcc/rust/expand/rust-macro-expand.h | 10 ++-- 2 files changed, 81 insertions(+), 23 deletions(-) (limited to 'gcc') diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc index 7aa4289..668b89f 100644 --- a/gcc/rust/expand/rust-macro-expand.cc +++ b/gcc/rust/expand/rust-macro-expand.cc @@ -3882,7 +3882,6 @@ MacroExpander::substitute_metavar ( { auto metavar_name = metavar->get_str (); - rust_debug ("expanding metavar: %s", metavar_name.c_str ()); std::vector> expanded; auto it = fragments.find (metavar_name); if (it == fragments.end ()) @@ -3908,40 +3907,95 @@ MacroExpander::substitute_metavar ( std::vector> MacroExpander::substitute_repetition ( std::vector> &input, - std::map &fragments, - std::vector> &pattern) + std::vector> ¯o, + std::map &fragments, size_t pattern_start, + size_t pattern_end) { - // If the repetition is not anything we know (ie no declared metavars, or - // metavars which aren't present in the fragment), we can just error out. No - // need to paste the tokens as if nothing had happened. - for (auto &token : pattern) - rust_debug ("[repetition pattern]: %s", token->as_string ().c_str ()); + rust_assert (pattern_end < macro.size ()); + + rust_debug ("pattern start: %lu", pattern_start); + rust_debug ("pattern end: %lu", pattern_end); + + std::vector> expanded; + + for (size_t i = pattern_start; i < pattern_end; i++) + rust_debug ("[repetition pattern]: %s", + macro.at (i)->as_string ().c_str ()); + + // Find the first fragment and get the amount of repetitions that we should + // perform + size_t repeat_amount = 0; + for (size_t i = pattern_start; i < pattern_end; i++) + { + if (macro.at (i)->get_id () == DOLLAR_SIGN) + { + auto &frag_token = macro.at (i + 1); + if (frag_token->get_id () == IDENTIFIER) + { + auto it = fragments.find (frag_token->get_str ()); + if (it == fragments.end ()) + { + // If the repetition is not anything we know (ie no declared + // metavars, or metavars which aren't present in the + // fragment), we can just error out. No need to paste the + // tokens as if nothing had happened. + rust_error_at (frag_token->get_locus (), + "metavar used in repetition does not exist"); + return expanded; + } - return std::vector> (); + repeat_amount = it->second.match_amount; + } + } + } + + std::vector> new_macro; + for (size_t tok_idx = pattern_start; tok_idx < pattern_end; tok_idx++) + { + new_macro.emplace_back (macro.at (tok_idx)->clone_token ()); + rust_debug ("new macro token: %s", + macro.at (tok_idx)->as_string ().c_str ()); + } + + // FIXME: We have to be careful and not push the repetition token + auto new_tokens = substitute_tokens (input, new_macro, fragments); + + rust_debug ("repetition amount to use: %lu", repeat_amount); + for (size_t i = 0; i < repeat_amount; i++) + { + for (auto &new_token : new_tokens) + expanded.emplace_back (new_token->clone_token ()); + } + + // FIXME: We also need to make sure that all subsequent fragments + // contain the same amount of repetitions as the first one + + return expanded; } std::pair>, size_t> MacroExpander::substitute_token ( - std::vector> ¯o, std::vector> &input, + std::vector> ¯o, std::map &fragments, size_t token_idx) { auto &token = macro.at (token_idx); switch (token->get_id ()) { case IDENTIFIER: - rust_debug ("expanding metavar"); + rust_debug ("expanding metavar: %s", token->get_str ().c_str ()); return {substitute_metavar (input, fragments, token), 1}; case LEFT_PAREN: { // We need to parse up until the closing delimiter and expand this // fragment->n times. rust_debug ("expanding repetition"); std::vector> repetition_pattern; - for (size_t rep_idx = token_idx + 1; - rep_idx < macro.size () - && macro.at (rep_idx)->get_id () != RIGHT_PAREN; - rep_idx++) - repetition_pattern.emplace_back (macro.at (rep_idx)->clone_token ()); + size_t pattern_start = token_idx + 1; + size_t pattern_end = pattern_start; + for (; pattern_end < macro.size () + && macro.at (pattern_end)->get_id () != RIGHT_PAREN; + pattern_end++) + ; // FIXME: This skips whitespaces... Is that okay?? // FIXME: Is there any existing parsing function that allows us to parse @@ -3953,9 +4007,11 @@ MacroExpander::substitute_token ( // FIXME: We need to parse the repetition token now return { - substitute_repetition (input, fragments, repetition_pattern), + substitute_repetition (input, macro, fragments, pattern_start, + pattern_end), // + 2 for the opening and closing parenthesis which are mandatory - repetition_pattern.size () + 2}; + // + 1 for the repetitor (+, *, ?) + pattern_end - pattern_start + 3}; } // TODO: We need to check if the $ was alone. In that case, do // not error out: Simply act as if there was an empty identifier @@ -3996,7 +4052,7 @@ MacroExpander::substitute_tokens ( { // Aaaaah, if only we had C++17 :) // auto [expanded, tok_to_skip] = ... - auto p = substitute_token (macro, input, fragments, i + 1); + auto p = substitute_token (input, macro, fragments, i + 1); auto expanded = std::move (p.first); auto tok_to_skip = p.second; diff --git a/gcc/rust/expand/rust-macro-expand.h b/gcc/rust/expand/rust-macro-expand.h index 943115d..51e5ad1 100644 --- a/gcc/rust/expand/rust-macro-expand.h +++ b/gcc/rust/expand/rust-macro-expand.h @@ -204,14 +204,16 @@ struct MacroExpander * * @param input Tokens given to the transcribing context * @param fragments Fragments given to the macro substitution - * @param repetition Set of tokens to substitute and replace + * @param pattern_start Start index of the pattern tokens + * @param pattern_end Index Amount of tokens in the pattern * * @return A vector containing the repeated pattern */ static std::vector> substitute_repetition (std::vector> &input, + std::vector> ¯o, std::map &fragments, - std::vector> &pattern); + size_t pattern_start, size_t pattern_end); /** * Substitute a given token by its appropriate representation @@ -228,8 +230,8 @@ struct MacroExpander * ahead of the input to avoid mis-substitutions */ static std::pair>, size_t> - substitute_token (std::vector> ¯o, - std::vector> &input, + substitute_token (std::vector> &input, + std::vector> ¯o, std::map &fragments, size_t token_idx); -- cgit v1.1 From 6db51e39363a4500dd8d3e04d87f682f9ef11f26 Mon Sep 17 00:00:00 2001 From: Arthur Cohen Date: Mon, 21 Feb 2022 15:03:18 +0100 Subject: subs_repetition: Add simple test cases --- gcc/testsuite/rust/execute/torture/macros10.rs | 20 ++++++++++++++++++++ gcc/testsuite/rust/execute/torture/macros11.rs | 22 ++++++++++++++++++++++ gcc/testsuite/rust/execute/torture/macros12.rs | 20 ++++++++++++++++++++ gcc/testsuite/rust/execute/torture/macros13.rs | 20 ++++++++++++++++++++ 4 files changed, 82 insertions(+) create mode 100644 gcc/testsuite/rust/execute/torture/macros10.rs create mode 100644 gcc/testsuite/rust/execute/torture/macros11.rs create mode 100644 gcc/testsuite/rust/execute/torture/macros12.rs create mode 100644 gcc/testsuite/rust/execute/torture/macros13.rs (limited to 'gcc') diff --git a/gcc/testsuite/rust/execute/torture/macros10.rs b/gcc/testsuite/rust/execute/torture/macros10.rs new file mode 100644 index 0000000..f1fc34e --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/macros10.rs @@ -0,0 +1,20 @@ +// { dg-output "18\n" } +extern "C" { + fn printf(s: *const i8, ...); +} + +fn print_int(value: i32) { + let s = "%d\n\0" as *const str as *const i8; + printf(s, value); +} + +macro_rules! add_exprs { + ($($e:expr)*) => (0 $(+ $e)*) +} + +fn main() -> i32 { + // 1 + 2 + 15 => 18 + print_int(add_exprs!(1 2 15)); + + 0 +} diff --git a/gcc/testsuite/rust/execute/torture/macros11.rs b/gcc/testsuite/rust/execute/torture/macros11.rs new file mode 100644 index 0000000..7ce7d80 --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/macros11.rs @@ -0,0 +1,22 @@ +// { dg-output "2" } +extern "C" { + fn printf(s: *const i8, ...); +} + +fn print_int(value: i32) { + let s = "%d\n\0"; + let s_p = s as *const str; + let c_p = s_p as *const i8; + unsafe { printf(c_p, value); } +} + +macro_rules! add_exprs { + ($($e:expr)?) => (0 $(+ $e)?) +} + +fn main() -> i32 { + // 2 + print_int(add_exprs!(2)); + + 0 +} diff --git a/gcc/testsuite/rust/execute/torture/macros12.rs b/gcc/testsuite/rust/execute/torture/macros12.rs new file mode 100644 index 0000000..ff4a862 --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/macros12.rs @@ -0,0 +1,20 @@ +// { dg-output "0\n" } +extern "C" { + fn printf(s: *const i8, ...); +} + +fn print_int(value: i32) { + let s = "%d\n\0" as *const str as *const i8; + printf(s, value); +} + +macro_rules! add_exprs { + ($($e:expr)?) => (0 $(+ $e)?) +} + +fn main() -> i32 { + // 0 + print_int(add_exprs!()); + + 0 +} diff --git a/gcc/testsuite/rust/execute/torture/macros13.rs b/gcc/testsuite/rust/execute/torture/macros13.rs new file mode 100644 index 0000000..af5dfe8 --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/macros13.rs @@ -0,0 +1,20 @@ +// { dg-output "18\n" } +extern "C" { + fn printf(s: *const i8, ...); +} + +fn print_int(value: i32) { + let s = "%d\n\0" as *const str as *const i8; + printf(s, value); +} + +macro_rules! add_exprs { + ($($e:expr)+) => (0 $(+ $e)+) +} + +fn main() -> i32 { + // 1 + 2 + 15 => 18 + print_int(add_exprs!(1 2 15)); + + 0 +} -- cgit v1.1 From 1f546e5e3addda01c5c449833642be8fcdf5dcdc Mon Sep 17 00:00:00 2001 From: Arthur Cohen Date: Tue, 22 Feb 2022 09:32:52 +0100 Subject: match_repetition: Set the correct amount of matches for each fragment Co-authored-by: philberty --- gcc/rust/expand/rust-macro-expand.cc | 101 +++++++------------------ gcc/rust/expand/rust-macro-expand.h | 12 ++- gcc/testsuite/rust/execute/torture/macros14.rs | 20 +++++ 3 files changed, 59 insertions(+), 74 deletions(-) create mode 100644 gcc/testsuite/rust/execute/torture/macros14.rs (limited to 'gcc') diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc index 668b89f..45d2b74 100644 --- a/gcc/rust/expand/rust-macro-expand.cc +++ b/gcc/rust/expand/rust-macro-expand.cc @@ -3611,7 +3611,6 @@ MacroExpander::match_n_matches ( match_amount = 0; const MacroInvocLexer &source = parser.get_token_source (); - std::vector fragment_identifiers; while (true) { // If the current token is a closing macro delimiter, break away. @@ -3637,8 +3636,6 @@ MacroExpander::match_n_matches ( {fragment->get_ident (), MatchedFragment (fragment->get_ident (), offs_begin, offs_end)}); - - fragment_identifiers.emplace_back (fragment->get_ident ()); } break; @@ -3677,21 +3674,10 @@ MacroExpander::match_n_matches ( // Check if the amount of matches we got is valid: Is it more than the lower // bound and less than the higher bound? - auto result = hi_bound ? match_amount >= lo_bound && match_amount <= hi_bound - : match_amount >= lo_bound; - - // We can now set the amount to each fragment we matched in the substack - auto &stack_map = sub_stack.peek (); - for (auto &fragment_id : fragment_identifiers) - { - auto it = stack_map.find (fragment_id); - - rust_assert (it != stack_map.end ()); + bool did_meet_lo_bound = match_amount >= lo_bound; + bool did_meet_hi_bound = hi_bound ? match_amount <= hi_bound : true; - it->second.set_match_amount (match_amount); - } - - return result; + return did_meet_lo_bound && did_meet_hi_bound; } bool @@ -3733,6 +3719,31 @@ MacroExpander::match_repetition (Parser &parser, rust_debug_loc (rep.get_match_locus (), "%s matched %lu times", res ? "successfully" : "unsuccessfully", match_amount); + // We can now set the amount to each fragment we matched in the substack + auto &stack_map = sub_stack.peek (); + for (auto &match : rep.get_matches ()) + { + if (match->get_macro_match_type () + == AST::MacroMatch::MacroMatchType::Fragment) + { + auto fragment = static_cast (match.get ()); + auto it = stack_map.find (fragment->get_ident ()); + + // If we can't find the fragment, but the result was valid, then it's + // a zero-matched fragment and we can insert it + if (it == stack_map.end ()) + { + stack_map.insert ( + {fragment->get_ident (), + MatchedFragment::zero (fragment->get_ident ())}); + } + else + { + it->second.set_match_amount (match_amount); + } + } + } + return res; } @@ -4037,14 +4048,6 @@ MacroExpander::substitute_tokens ( { std::vector> replaced_tokens; - // for token in macro - // if token == ?: - // // That's not always true: If it's a left paren, it's repetition - // // We probably want to store the matched amount in the fragment so - // // we can expand it here - // id = next_token(); - // frag = fragment.find(id); - for (size_t i = 0; i < macro.size (); i++) { auto &tok = macro.at (i); @@ -4065,54 +4068,6 @@ MacroExpander::substitute_tokens ( { replaced_tokens.emplace_back (tok->clone_token ()); } - - // std::vector> parsed_toks; - - // std::string ident; - // for (size_t offs = i; i < macro.size (); offs++) - // { - // auto &tok = macro.at (offs); - // if (tok->get_id () == DOLLAR_SIGN && offs == i) - // { - // parsed_toks.push_back (tok->clone_token ()); - // } - // else if (tok->get_id () == IDENTIFIER) - // { - // rust_assert (tok->as_string ().size () == 1); - // ident.push_back (tok->as_string ().at (0)); - // parsed_toks.push_back (tok->clone_token ()); - // } - // else - // { - // break; - // } - // } - - // // lookup the ident - // auto it = fragments.find (ident); - // if (it == fragments.end ()) - // { - // // just leave the tokens in - // for (auto &tok : parsed_toks) - // { - // replaced_tokens.push_back (tok->clone_token ()); - // } - // } - // else - // { - // // replace - // MatchedFragment &frag = it->second; - // for (size_t offs = frag.token_offset_begin; - // offs < frag.token_offset_end; offs++) - // { - // auto &tok = input.at (offs); - // replaced_tokens.push_back (tok->clone_token ()); - // } - // } - // i += parsed_toks.size () - 1; - // - // } - // else { replaced_tokens.push_back (tok->clone_token ()); } } return replaced_tokens; diff --git a/gcc/rust/expand/rust-macro-expand.h b/gcc/rust/expand/rust-macro-expand.h index 51e5ad1..f3cb36d 100644 --- a/gcc/rust/expand/rust-macro-expand.h +++ b/gcc/rust/expand/rust-macro-expand.h @@ -57,11 +57,21 @@ struct MatchedFragment size_t match_amount; MatchedFragment (std::string identifier, size_t token_offset_begin, - size_t token_offset_end, size_t match_amount = 0) + size_t token_offset_end, size_t match_amount = 1) : fragment_ident (identifier), token_offset_begin (token_offset_begin), token_offset_end (token_offset_end), match_amount (match_amount) {} + /** + * Create a valid fragment matched zero times. This is useful for repetitions + * which allow the absence of a fragment, such as * and ? + */ + static MatchedFragment zero (std::string identifier) + { + // We don't need offsets since there is "no match" + return MatchedFragment (identifier, 0, 0, 0); + } + std::string as_string () const { return fragment_ident + "=" + std::to_string (token_offset_begin) + ":" diff --git a/gcc/testsuite/rust/execute/torture/macros14.rs b/gcc/testsuite/rust/execute/torture/macros14.rs new file mode 100644 index 0000000..2dc95e3 --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/macros14.rs @@ -0,0 +1,20 @@ +// { dg-output "15\n" } +extern "C" { + fn printf(s: *const i8, ...); +} + +fn print_int(value: i32) { + let s = "%d\n\0" as *const str as *const i8; + printf(s, value); +} + +macro_rules! add_exprs { + ($($e:expr)*) => (15 $(+ $e)*) +} + +fn main() -> i32 { + // 15 + print_int(add_exprs!()); + + 0 +} -- cgit v1.1 From 92a62562c804b8cc400383bc4b0acb9e79e22a93 Mon Sep 17 00:00:00 2001 From: Arthur Cohen Date: Tue, 22 Feb 2022 10:36:41 +0100 Subject: substitute_repetition: Correctly insert sub-fragments --- gcc/rust/expand/rust-macro-expand.cc | 117 +++++++++++++++++++++-------------- gcc/rust/expand/rust-macro-expand.h | 73 ++++++++++++++-------- 2 files changed, 119 insertions(+), 71 deletions(-) (limited to 'gcc') diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc index 45d2b74..b54aa01 100644 --- a/gcc/rust/expand/rust-macro-expand.cc +++ b/gcc/rust/expand/rust-macro-expand.cc @@ -3118,7 +3118,7 @@ MacroExpander::expand_decl_macro (Location invoc_locus, // find matching arm AST::MacroRule *matched_rule = nullptr; - std::map matched_fragments; + std::map> matched_fragments; for (auto &rule : rules_def.get_rules ()) { sub_stack.push (); @@ -3127,9 +3127,9 @@ MacroExpander::expand_decl_macro (Location invoc_locus, if (did_match_rule) { - for (auto &frag : matched_fragments) - rust_debug ("matched fragment: %s", - frag.second.as_string ().c_str ()); + for (auto &kv : matched_fragments) + rust_debug ("[fragment]: %s (%ld)", kv.first.c_str (), + kv.second.size ()); matched_rule = &rule; break; @@ -3535,9 +3535,8 @@ MacroExpander::match_matcher (Parser &parser, // matched fragment get the offset in the token stream size_t offs_end = source.get_offs (); - sub_stack.peek ().insert ( - {fragment->get_ident (), - MatchedFragment (fragment->get_ident (), offs_begin, offs_end)}); + sub_stack.insert_fragment ( + MatchedFragment (fragment->get_ident (), offs_begin, offs_end)); } break; @@ -3632,10 +3631,9 @@ MacroExpander::match_n_matches ( // matched fragment get the offset in the token stream size_t offs_end = source.get_offs (); - sub_stack.peek ().insert ( - {fragment->get_ident (), - MatchedFragment (fragment->get_ident (), offs_begin, - offs_end)}); + sub_stack.insert_fragment ( + MatchedFragment (fragment->get_ident (), offs_begin, + offs_end)); } break; @@ -3729,17 +3727,19 @@ MacroExpander::match_repetition (Parser &parser, auto fragment = static_cast (match.get ()); auto it = stack_map.find (fragment->get_ident ()); - // If we can't find the fragment, but the result was valid, then it's - // a zero-matched fragment and we can insert it + // If we can't find the fragment, but the result was valid, then + // it's a zero-matched fragment and we can insert it if (it == stack_map.end ()) { - stack_map.insert ( - {fragment->get_ident (), - MatchedFragment::zero (fragment->get_ident ())}); + sub_stack.insert_fragment ( + MatchedFragment::zero (fragment->get_ident ())); } else { - it->second.set_match_amount (match_amount); + // We can just set the repetition amount on the first match + // FIXME: Make this more ergonomic and similar to what we fetch + // in `substitute_repetition` + it->second[0].set_match_amount (match_amount); } } } @@ -3750,8 +3750,8 @@ MacroExpander::match_repetition (Parser &parser, AST::ASTFragment MacroExpander::transcribe_rule ( AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree, - std::map &matched_fragments, bool semicolon, - ContextType ctx) + std::map> &matched_fragments, + bool semicolon, ContextType ctx) { // we can manipulate the token tree to substitute the dollar identifiers so // that when we call parse its already substituted for us @@ -3765,10 +3765,10 @@ MacroExpander::transcribe_rule ( = substitute_tokens (invoc_stream, macro_rule_tokens, matched_fragments); // // handy for debugging - for (auto &tok : substituted_tokens) - { - rust_debug ("tok: [%s]", tok->as_string ().c_str ()); - } + // for (auto &tok : substituted_tokens) + // { + // rust_debug ("tok: [%s]", tok->as_string ().c_str ()); + // } // parse it to an ASTFragment MacroInvocLexer lex (std::move (substituted_tokens)); @@ -3888,7 +3888,7 @@ MacroExpander::transcribe_rule ( std::vector> MacroExpander::substitute_metavar ( std::vector> &input, - std::map &fragments, + std::map> &fragments, std::unique_ptr &metavar) { auto metavar_name = metavar->get_str (); @@ -3903,7 +3903,10 @@ MacroExpander::substitute_metavar ( else { // Replace - MatchedFragment &frag = it->second; + // We only care about the vector when expanding repetitions. Just access + // the first element of the vector. + // FIXME: Clean this up so it makes more sense + auto &frag = it->second[0]; for (size_t offs = frag.token_offset_begin; offs < frag.token_offset_end; offs++) { @@ -3919,8 +3922,8 @@ std::vector> MacroExpander::substitute_repetition ( std::vector> &input, std::vector> ¯o, - std::map &fragments, size_t pattern_start, - size_t pattern_end) + std::map> &fragments, + size_t pattern_start, size_t pattern_end) { rust_assert (pattern_end < macro.size ()); @@ -3929,10 +3932,6 @@ MacroExpander::substitute_repetition ( std::vector> expanded; - for (size_t i = pattern_start; i < pattern_end; i++) - rust_debug ("[repetition pattern]: %s", - macro.at (i)->as_string ().c_str ()); - // Find the first fragment and get the amount of repetitions that we should // perform size_t repeat_amount = 0; @@ -3951,29 +3950,56 @@ MacroExpander::substitute_repetition ( // fragment), we can just error out. No need to paste the // tokens as if nothing had happened. rust_error_at (frag_token->get_locus (), - "metavar used in repetition does not exist"); + "metavar %s used in repetition does not exist", + frag_token->get_str ().c_str ()); + // FIXME: return expanded; } - repeat_amount = it->second.match_amount; + // FIXME: Refactor, ugly + repeat_amount = it->second[0].match_amount; } } } + rust_debug ("repetition amount to use: %lu", repeat_amount); std::vector> new_macro; - for (size_t tok_idx = pattern_start; tok_idx < pattern_end; tok_idx++) - { - new_macro.emplace_back (macro.at (tok_idx)->clone_token ()); - rust_debug ("new macro token: %s", - macro.at (tok_idx)->as_string ().c_str ()); - } - // FIXME: We have to be careful and not push the repetition token - auto new_tokens = substitute_tokens (input, new_macro, fragments); + // We want to generate a "new macro" to substitute with. This new macro + // should contain only the tokens inside the pattern + for (size_t tok_idx = pattern_start; tok_idx < pattern_end; tok_idx++) + new_macro.emplace_back (macro.at (tok_idx)->clone_token ()); + + // Then, we want to create a subset of the matches so that + // `substitute_tokens()` can only see one fragment per metavar. Let's say we + // have the following user input: (1 145 'h') + // on the following match arm: ($($lit:literal)*) + // which causes the following matches: { "lit": [1, 145, 'h'] } + // + // The pattern (new_macro) is `$lit:literal` + // The first time we expand it, we want $lit to have the following token: 1 + // The second time, 145 + // The third and final time, 'h' + // + // In order to do so we must create "sub maps", which only contain parts of + // the original matches + // sub-maps: [ { "lit": 1 }, { "lit": 145 }, { "lit": 'h' } ] + // + // and give them to `substitute_tokens` one by one. - rust_debug ("repetition amount to use: %lu", repeat_amount); for (size_t i = 0; i < repeat_amount; i++) { + std::map> sub_map; + for (auto &kv_match : fragments) + { + std::vector sub_vec; + sub_vec.emplace_back (kv_match.second[i]); + + sub_map.insert ({kv_match.first, sub_vec}); + } + + auto new_tokens = substitute_tokens (input, new_macro, sub_map); + for (auto &new_token : new_tokens) expanded.emplace_back (new_token->clone_token ()); } @@ -3988,7 +4014,8 @@ std::pair>, size_t> MacroExpander::substitute_token ( std::vector> &input, std::vector> ¯o, - std::map &fragments, size_t token_idx) + std::map> &fragments, + size_t token_idx) { auto &token = macro.at (token_idx); switch (token->get_id ()) @@ -4020,7 +4047,7 @@ MacroExpander::substitute_token ( return { substitute_repetition (input, macro, fragments, pattern_start, pattern_end), - // + 2 for the opening and closing parenthesis which are mandatory + // + 2 for the opening and closing parentheses which are mandatory // + 1 for the repetitor (+, *, ?) pattern_end - pattern_start + 3}; } @@ -4044,7 +4071,7 @@ std::vector> MacroExpander::substitute_tokens ( std::vector> &input, std::vector> ¯o, - std::map &fragments) + std::map> &fragments) { std::vector> replaced_tokens; diff --git a/gcc/rust/expand/rust-macro-expand.h b/gcc/rust/expand/rust-macro-expand.h index f3cb36d..eeafdb8 100644 --- a/gcc/rust/expand/rust-macro-expand.h +++ b/gcc/rust/expand/rust-macro-expand.h @@ -89,17 +89,38 @@ public: void push () { stack.push_back ({}); } - std::map pop () + std::map> pop () { auto top = stack.back (); stack.pop_back (); return top; } - std::map &peek () { return stack.back (); } + std::map> &peek () + { + return stack.back (); + } + + void insert_fragment (MatchedFragment fragment) + { + auto ¤t_map = stack.back (); + auto it = current_map.find (fragment.fragment_ident); + + if (it == current_map.end ()) + { + auto new_frags = std::vector (); + new_frags.emplace_back (fragment); + current_map.insert ({fragment.fragment_ident, new_frags}); + } + else + { + auto &frags = it->second; + frags.emplace_back (fragment); + } + } private: - std::vector> stack; + std::vector>> stack; }; // Object used to store shared data (between functions) for macro expansion. @@ -151,11 +172,10 @@ struct MacroExpander bool try_match_rule (AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree); - AST::ASTFragment - transcribe_rule (AST::MacroRule &match_rule, - AST::DelimTokenTree &invoc_token_tree, - std::map &matched_fragments, - bool semicolon, ContextType ctx); + AST::ASTFragment transcribe_rule ( + AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree, + std::map> &matched_fragments, + bool semicolon, ContextType ctx); bool match_fragment (Parser &parser, AST::MacroMatchFragment &fragment); @@ -204,10 +224,10 @@ struct MacroExpander * @return A token containing the associated fragment expanded into tokens if * any, or the cloned token if no fragment was associated */ - static std::vector> - substitute_metavar (std::vector> &input, - std::map &fragments, - std::unique_ptr &metavar); + static std::vector> substitute_metavar ( + std::vector> &input, + std::map> &fragments, + std::unique_ptr &metavar); /** * Substitute a macro repetition by its given fragments @@ -219,11 +239,11 @@ struct MacroExpander * * @return A vector containing the repeated pattern */ - static std::vector> - substitute_repetition (std::vector> &input, - std::vector> ¯o, - std::map &fragments, - size_t pattern_start, size_t pattern_end); + static std::vector> substitute_repetition ( + std::vector> &input, + std::vector> ¯o, + std::map> &fragments, + size_t pattern_start, size_t pattern_end); /** * Substitute a given token by its appropriate representation @@ -240,15 +260,16 @@ struct MacroExpander * ahead of the input to avoid mis-substitutions */ static std::pair>, size_t> - substitute_token (std::vector> &input, - std::vector> ¯o, - std::map &fragments, - size_t token_idx); - - static std::vector> - substitute_tokens (std::vector> &input, - std::vector> ¯o, - std::map &fragments); + substitute_token ( + std::vector> &input, + std::vector> ¯o, + std::map> &fragments, + size_t token_idx); + + static std::vector> substitute_tokens ( + std::vector> &input, + std::vector> ¯o, + std::map> &fragments); void push_context (ContextType t) { context.push_back (t); } -- cgit v1.1