diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2022-02-23 09:17:08 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-23 09:17:08 +0000 |
commit | 10de9cf4f3765526a1a82a4a7d14908b58c6538c (patch) | |
tree | 74a005cad16ea7ccd00f95720523c2f569de0fa1 /gcc | |
parent | e0f261f1aea2c9968a07442ca5d4eab813f86a58 (diff) | |
parent | 92a62562c804b8cc400383bc4b0acb9e79e22a93 (diff) | |
download | gcc-10de9cf4f3765526a1a82a4a7d14908b58c6538c.zip gcc-10de9cf4f3765526a1a82a4a7d14908b58c6538c.tar.gz gcc-10de9cf4f3765526a1a82a4a7d14908b58c6538c.tar.bz2 |
Merge #956
956: Substitute repetitions r=CohenArthur a=CohenArthur
Needs #955
This PR splits up the `substitute_tokens` function into multiple smaller functions. Still a draft until I can get repetitions working.
Closes #960
Closes #961
Co-authored-by: Arthur Cohen <arthur.cohen@embecosm.com>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/rust/ast/rust-ast.h | 1 | ||||
-rw-r--r-- | gcc/rust/expand/rust-macro-expand.cc | 307 | ||||
-rw-r--r-- | gcc/rust/expand/rust-macro-expand.h | 109 | ||||
-rw-r--r-- | gcc/testsuite/rust/execute/torture/macros10.rs | 20 | ||||
-rw-r--r-- | gcc/testsuite/rust/execute/torture/macros11.rs | 22 | ||||
-rw-r--r-- | gcc/testsuite/rust/execute/torture/macros12.rs | 20 | ||||
-rw-r--r-- | gcc/testsuite/rust/execute/torture/macros13.rs | 20 | ||||
-rw-r--r-- | gcc/testsuite/rust/execute/torture/macros14.rs | 20 |
8 files changed, 432 insertions, 87 deletions
diff --git a/gcc/rust/ast/rust-ast.h b/gcc/rust/ast/rust-ast.h index e72937e..dfd0b3e 100644 --- a/gcc/rust/ast/rust-ast.h +++ b/gcc/rust/ast/rust-ast.h @@ -204,6 +204,7 @@ public: std::vector<std::unique_ptr<Token> > to_token_stream () const override; TokenId get_id () const { return tok_ref->get_id (); } + const std::string &get_str () const { return tok_ref->get_str (); } Location get_locus () const { return tok_ref->get_locus (); } diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc index ff0f169..b54aa01 100644 --- a/gcc/rust/expand/rust-macro-expand.cc +++ b/gcc/rust/expand/rust-macro-expand.cc @@ -3118,7 +3118,7 @@ MacroExpander::expand_decl_macro (Location invoc_locus, // find matching arm AST::MacroRule *matched_rule = nullptr; - std::map<std::string, MatchedFragment> matched_fragments; + std::map<std::string, std::vector<MatchedFragment>> matched_fragments; for (auto &rule : rules_def.get_rules ()) { sub_stack.push (); @@ -3127,9 +3127,9 @@ MacroExpander::expand_decl_macro (Location invoc_locus, if (did_match_rule) { - for (auto &frag : matched_fragments) - rust_debug ("matched fragment: %s", - frag.second.as_string ().c_str ()); + for (auto &kv : matched_fragments) + rust_debug ("[fragment]: %s (%ld)", kv.first.c_str (), + kv.second.size ()); matched_rule = &rule; break; @@ -3535,9 +3535,8 @@ MacroExpander::match_matcher (Parser<MacroInvocLexer> &parser, // matched fragment get the offset in the token stream size_t offs_end = source.get_offs (); - sub_stack.peek ().insert ( - {fragment->get_ident (), - MatchedFragment (fragment->get_ident (), offs_begin, offs_end)}); + sub_stack.insert_fragment ( + MatchedFragment (fragment->get_ident (), offs_begin, offs_end)); } break; @@ -3611,7 +3610,6 @@ MacroExpander::match_n_matches ( match_amount = 0; const MacroInvocLexer &source = parser.get_token_source (); - std::vector<std::string> fragment_identifiers; while (true) { // If the current token is a closing macro delimiter, break away. @@ -3633,12 +3631,9 @@ MacroExpander::match_n_matches ( // matched fragment get the offset in the token stream size_t offs_end = source.get_offs (); - sub_stack.peek ().insert ( - {fragment->get_ident (), - MatchedFragment (fragment->get_ident (), offs_begin, - offs_end)}); - - fragment_identifiers.emplace_back (fragment->get_ident ()); + sub_stack.insert_fragment ( + MatchedFragment (fragment->get_ident (), offs_begin, + offs_end)); } break; @@ -3677,21 +3672,10 @@ MacroExpander::match_n_matches ( // Check if the amount of matches we got is valid: Is it more than the lower // bound and less than the higher bound? - auto result = hi_bound ? match_amount >= lo_bound && match_amount <= hi_bound - : match_amount >= lo_bound; - - // We can now set the amount to each fragment we matched in the substack - auto &stack_map = sub_stack.peek (); - for (auto &fragment_id : fragment_identifiers) - { - auto it = stack_map.find (fragment_id); - - rust_assert (it != stack_map.end ()); + bool did_meet_lo_bound = match_amount >= lo_bound; + bool did_meet_hi_bound = hi_bound ? match_amount <= hi_bound : true; - it->second.set_match_amount (match_amount); - } - - return result; + return did_meet_lo_bound && did_meet_hi_bound; } bool @@ -3733,14 +3717,41 @@ MacroExpander::match_repetition (Parser<MacroInvocLexer> &parser, rust_debug_loc (rep.get_match_locus (), "%s matched %lu times", res ? "successfully" : "unsuccessfully", match_amount); + // We can now set the amount to each fragment we matched in the substack + auto &stack_map = sub_stack.peek (); + for (auto &match : rep.get_matches ()) + { + if (match->get_macro_match_type () + == AST::MacroMatch::MacroMatchType::Fragment) + { + auto fragment = static_cast<AST::MacroMatchFragment *> (match.get ()); + auto it = stack_map.find (fragment->get_ident ()); + + // If we can't find the fragment, but the result was valid, then + // it's a zero-matched fragment and we can insert it + if (it == stack_map.end ()) + { + sub_stack.insert_fragment ( + MatchedFragment::zero (fragment->get_ident ())); + } + else + { + // We can just set the repetition amount on the first match + // FIXME: Make this more ergonomic and similar to what we fetch + // in `substitute_repetition` + it->second[0].set_match_amount (match_amount); + } + } + } + return res; } AST::ASTFragment MacroExpander::transcribe_rule ( AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree, - std::map<std::string, MatchedFragment> &matched_fragments, bool semicolon, - ContextType ctx) + std::map<std::string, std::vector<MatchedFragment>> &matched_fragments, + bool semicolon, ContextType ctx) { // we can manipulate the token tree to substitute the dollar identifiers so // that when we call parse its already substituted for us @@ -3875,10 +3886,192 @@ MacroExpander::transcribe_rule ( } std::vector<std::unique_ptr<AST::Token>> +MacroExpander::substitute_metavar ( + std::vector<std::unique_ptr<AST::Token>> &input, + std::map<std::string, std::vector<MatchedFragment>> &fragments, + std::unique_ptr<AST::Token> &metavar) +{ + auto metavar_name = metavar->get_str (); + + std::vector<std::unique_ptr<AST::Token>> expanded; + auto it = fragments.find (metavar_name); + if (it == fragments.end ()) + { + // Return a copy of the original token + expanded.push_back (metavar->clone_token ()); + } + else + { + // Replace + // We only care about the vector when expanding repetitions. Just access + // the first element of the vector. + // FIXME: Clean this up so it makes more sense + auto &frag = it->second[0]; + for (size_t offs = frag.token_offset_begin; offs < frag.token_offset_end; + offs++) + { + auto &tok = input.at (offs); + expanded.push_back (tok->clone_token ()); + } + } + + return expanded; +} + +std::vector<std::unique_ptr<AST::Token>> +MacroExpander::substitute_repetition ( + std::vector<std::unique_ptr<AST::Token>> &input, + std::vector<std::unique_ptr<AST::Token>> ¯o, + std::map<std::string, std::vector<MatchedFragment>> &fragments, + size_t pattern_start, size_t pattern_end) +{ + rust_assert (pattern_end < macro.size ()); + + rust_debug ("pattern start: %lu", pattern_start); + rust_debug ("pattern end: %lu", pattern_end); + + std::vector<std::unique_ptr<AST::Token>> expanded; + + // Find the first fragment and get the amount of repetitions that we should + // perform + size_t repeat_amount = 0; + for (size_t i = pattern_start; i < pattern_end; i++) + { + if (macro.at (i)->get_id () == DOLLAR_SIGN) + { + auto &frag_token = macro.at (i + 1); + if (frag_token->get_id () == IDENTIFIER) + { + auto it = fragments.find (frag_token->get_str ()); + if (it == fragments.end ()) + { + // If the repetition is not anything we know (ie no declared + // metavars, or metavars which aren't present in the + // fragment), we can just error out. No need to paste the + // tokens as if nothing had happened. + rust_error_at (frag_token->get_locus (), + "metavar %s used in repetition does not exist", + frag_token->get_str ().c_str ()); + // FIXME: + return expanded; + } + + // FIXME: Refactor, ugly + repeat_amount = it->second[0].match_amount; + } + } + } + + rust_debug ("repetition amount to use: %lu", repeat_amount); + std::vector<std::unique_ptr<AST::Token>> new_macro; + + // We want to generate a "new macro" to substitute with. This new macro + // should contain only the tokens inside the pattern + for (size_t tok_idx = pattern_start; tok_idx < pattern_end; tok_idx++) + new_macro.emplace_back (macro.at (tok_idx)->clone_token ()); + + // Then, we want to create a subset of the matches so that + // `substitute_tokens()` can only see one fragment per metavar. Let's say we + // have the following user input: (1 145 'h') + // on the following match arm: ($($lit:literal)*) + // which causes the following matches: { "lit": [1, 145, 'h'] } + // + // The pattern (new_macro) is `$lit:literal` + // The first time we expand it, we want $lit to have the following token: 1 + // The second time, 145 + // The third and final time, 'h' + // + // In order to do so we must create "sub maps", which only contain parts of + // the original matches + // sub-maps: [ { "lit": 1 }, { "lit": 145 }, { "lit": 'h' } ] + // + // and give them to `substitute_tokens` one by one. + + for (size_t i = 0; i < repeat_amount; i++) + { + std::map<std::string, std::vector<MatchedFragment>> sub_map; + for (auto &kv_match : fragments) + { + std::vector<MatchedFragment> sub_vec; + sub_vec.emplace_back (kv_match.second[i]); + + sub_map.insert ({kv_match.first, sub_vec}); + } + + auto new_tokens = substitute_tokens (input, new_macro, sub_map); + + for (auto &new_token : new_tokens) + expanded.emplace_back (new_token->clone_token ()); + } + + // FIXME: We also need to make sure that all subsequent fragments + // contain the same amount of repetitions as the first one + + return expanded; +} + +std::pair<std::vector<std::unique_ptr<AST::Token>>, size_t> +MacroExpander::substitute_token ( + std::vector<std::unique_ptr<AST::Token>> &input, + std::vector<std::unique_ptr<AST::Token>> ¯o, + std::map<std::string, std::vector<MatchedFragment>> &fragments, + size_t token_idx) +{ + auto &token = macro.at (token_idx); + switch (token->get_id ()) + { + case IDENTIFIER: + rust_debug ("expanding metavar: %s", token->get_str ().c_str ()); + return {substitute_metavar (input, fragments, token), 1}; + case LEFT_PAREN: { + // We need to parse up until the closing delimiter and expand this + // fragment->n times. + rust_debug ("expanding repetition"); + std::vector<std::unique_ptr<AST::Token>> repetition_pattern; + size_t pattern_start = token_idx + 1; + size_t pattern_end = pattern_start; + for (; pattern_end < macro.size () + && macro.at (pattern_end)->get_id () != RIGHT_PAREN; + pattern_end++) + ; + + // FIXME: This skips whitespaces... Is that okay?? + // FIXME: Is there any existing parsing function that allows us to parse + // a macro pattern? + + // FIXME: Add error handling in the case we haven't found a matching + // closing delimiter + + // FIXME: We need to parse the repetition token now + + return { + substitute_repetition (input, macro, fragments, pattern_start, + pattern_end), + // + 2 for the opening and closing parentheses which are mandatory + // + 1 for the repetitor (+, *, ?) + pattern_end - pattern_start + 3}; + } + // TODO: We need to check if the $ was alone. In that case, do + // not error out: Simply act as if there was an empty identifier + // with no associated fragment and paste the dollar sign in the + // transcription. Unsure how to do that since we always have at + // least the closing curly brace after an empty $... + default: + rust_error_at (token->get_locus (), + "unexpected token in macro transcribe: expected " + "%<(%> or identifier after %<$%>, got %<%s%>", + get_token_description (token->get_id ())); + } + + // FIXME: gcc_unreachable() error case? + return {std::vector<std::unique_ptr<AST::Token>> (), 0}; +} + +std::vector<std::unique_ptr<AST::Token>> MacroExpander::substitute_tokens ( std::vector<std::unique_ptr<AST::Token>> &input, std::vector<std::unique_ptr<AST::Token>> ¯o, - std::map<std::string, MatchedFragment> &fragments) + std::map<std::string, std::vector<MatchedFragment>> &fragments) { std::vector<std::unique_ptr<AST::Token>> replaced_tokens; @@ -3887,54 +4080,20 @@ MacroExpander::substitute_tokens ( auto &tok = macro.at (i); if (tok->get_id () == DOLLAR_SIGN) { - std::vector<std::unique_ptr<AST::Token>> parsed_toks; + // Aaaaah, if only we had C++17 :) + // auto [expanded, tok_to_skip] = ... + auto p = substitute_token (input, macro, fragments, i + 1); + auto expanded = std::move (p.first); + auto tok_to_skip = p.second; - std::string ident; - for (size_t offs = i; i < macro.size (); offs++) - { - auto &tok = macro.at (offs); - if (tok->get_id () == DOLLAR_SIGN && offs == i) - { - parsed_toks.push_back (tok->clone_token ()); - } - else if (tok->get_id () == IDENTIFIER) - { - rust_assert (tok->as_string ().size () == 1); - ident.push_back (tok->as_string ().at (0)); - parsed_toks.push_back (tok->clone_token ()); - } - else - { - break; - } - } + i += tok_to_skip; - // lookup the ident - auto it = fragments.find (ident); - if (it == fragments.end ()) - { - // just leave the tokens in - for (auto &tok : parsed_toks) - { - replaced_tokens.push_back (tok->clone_token ()); - } - } - else - { - // replace - MatchedFragment &frag = it->second; - for (size_t offs = frag.token_offset_begin; - offs < frag.token_offset_end; offs++) - { - auto &tok = input.at (offs); - replaced_tokens.push_back (tok->clone_token ()); - } - } - i += parsed_toks.size () - 1; + for (auto &token : expanded) + replaced_tokens.emplace_back (token->clone_token ()); } else { - replaced_tokens.push_back (tok->clone_token ()); + replaced_tokens.emplace_back (tok->clone_token ()); } } diff --git a/gcc/rust/expand/rust-macro-expand.h b/gcc/rust/expand/rust-macro-expand.h index edb091d..eeafdb8 100644 --- a/gcc/rust/expand/rust-macro-expand.h +++ b/gcc/rust/expand/rust-macro-expand.h @@ -57,11 +57,21 @@ struct MatchedFragment size_t match_amount; MatchedFragment (std::string identifier, size_t token_offset_begin, - size_t token_offset_end, size_t match_amount = 0) + size_t token_offset_end, size_t match_amount = 1) : fragment_ident (identifier), token_offset_begin (token_offset_begin), token_offset_end (token_offset_end), match_amount (match_amount) {} + /** + * Create a valid fragment matched zero times. This is useful for repetitions + * which allow the absence of a fragment, such as * and ? + */ + static MatchedFragment zero (std::string identifier) + { + // We don't need offsets since there is "no match" + return MatchedFragment (identifier, 0, 0, 0); + } + std::string as_string () const { return fragment_ident + "=" + std::to_string (token_offset_begin) + ":" @@ -79,17 +89,38 @@ public: void push () { stack.push_back ({}); } - std::map<std::string, MatchedFragment> pop () + std::map<std::string, std::vector<MatchedFragment>> pop () { auto top = stack.back (); stack.pop_back (); return top; } - std::map<std::string, MatchedFragment> &peek () { return stack.back (); } + std::map<std::string, std::vector<MatchedFragment>> &peek () + { + return stack.back (); + } + + void insert_fragment (MatchedFragment fragment) + { + auto ¤t_map = stack.back (); + auto it = current_map.find (fragment.fragment_ident); + + if (it == current_map.end ()) + { + auto new_frags = std::vector<MatchedFragment> (); + new_frags.emplace_back (fragment); + current_map.insert ({fragment.fragment_ident, new_frags}); + } + else + { + auto &frags = it->second; + frags.emplace_back (fragment); + } + } private: - std::vector<std::map<std::string, MatchedFragment>> stack; + std::vector<std::map<std::string, std::vector<MatchedFragment>>> stack; }; // Object used to store shared data (between functions) for macro expansion. @@ -141,11 +172,10 @@ struct MacroExpander bool try_match_rule (AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree); - AST::ASTFragment - transcribe_rule (AST::MacroRule &match_rule, - AST::DelimTokenTree &invoc_token_tree, - std::map<std::string, MatchedFragment> &matched_fragments, - bool semicolon, ContextType ctx); + AST::ASTFragment transcribe_rule ( + AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree, + std::map<std::string, std::vector<MatchedFragment>> &matched_fragments, + bool semicolon, ContextType ctx); bool match_fragment (Parser<MacroInvocLexer> &parser, AST::MacroMatchFragment &fragment); @@ -183,10 +213,63 @@ struct MacroExpander size_t &match_amount, size_t lo_bound = 0, size_t hi_bound = 0); - static std::vector<std::unique_ptr<AST::Token>> - substitute_tokens (std::vector<std::unique_ptr<AST::Token>> &input, - std::vector<std::unique_ptr<AST::Token>> ¯o, - std::map<std::string, MatchedFragment> &fragments); + /** + * Substitute a metavariable by its given fragment in a transcribing context, + * i.e. replacing $var with the associated fragment. + * + * @param input Tokens given to the transcribing context + * @param fragments Fragments given to the macro substitution + * @param metavar Metavariable to try and replace + * + * @return A token containing the associated fragment expanded into tokens if + * any, or the cloned token if no fragment was associated + */ + static std::vector<std::unique_ptr<AST::Token>> substitute_metavar ( + std::vector<std::unique_ptr<AST::Token>> &input, + std::map<std::string, std::vector<MatchedFragment>> &fragments, + std::unique_ptr<AST::Token> &metavar); + + /** + * Substitute a macro repetition by its given fragments + * + * @param input Tokens given to the transcribing context + * @param fragments Fragments given to the macro substitution + * @param pattern_start Start index of the pattern tokens + * @param pattern_end Index Amount of tokens in the pattern + * + * @return A vector containing the repeated pattern + */ + static std::vector<std::unique_ptr<AST::Token>> substitute_repetition ( + std::vector<std::unique_ptr<AST::Token>> &input, + std::vector<std::unique_ptr<AST::Token>> ¯o, + std::map<std::string, std::vector<MatchedFragment>> &fragments, + size_t pattern_start, size_t pattern_end); + + /** + * Substitute a given token by its appropriate representation + * + * @param macro Tokens used in the macro declaration + * @param input Tokens given to the transcribing context + * @param fragments Fragments given to the macro substitution + * @param token Current token to try and substitute + * + * @return A token containing the associated fragment expanded into tokens if + * any, or the cloned token if no fragment was associated, as well as the + * amount of tokens that should be skipped before the next invocation. Since + * this function may consume more than just one token, it is important to skip + * ahead of the input to avoid mis-substitutions + */ + static std::pair<std::vector<std::unique_ptr<AST::Token>>, size_t> + substitute_token ( + std::vector<std::unique_ptr<AST::Token>> &input, + std::vector<std::unique_ptr<AST::Token>> ¯o, + std::map<std::string, std::vector<MatchedFragment>> &fragments, + size_t token_idx); + + static std::vector<std::unique_ptr<AST::Token>> substitute_tokens ( + std::vector<std::unique_ptr<AST::Token>> &input, + std::vector<std::unique_ptr<AST::Token>> ¯o, + std::map<std::string, std::vector<MatchedFragment>> &fragments); void push_context (ContextType t) { context.push_back (t); } diff --git a/gcc/testsuite/rust/execute/torture/macros10.rs b/gcc/testsuite/rust/execute/torture/macros10.rs new file mode 100644 index 0000000..f1fc34e --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/macros10.rs @@ -0,0 +1,20 @@ +// { dg-output "18\n" } +extern "C" { + fn printf(s: *const i8, ...); +} + +fn print_int(value: i32) { + let s = "%d\n\0" as *const str as *const i8; + printf(s, value); +} + +macro_rules! add_exprs { + ($($e:expr)*) => (0 $(+ $e)*) +} + +fn main() -> i32 { + // 1 + 2 + 15 => 18 + print_int(add_exprs!(1 2 15)); + + 0 +} diff --git a/gcc/testsuite/rust/execute/torture/macros11.rs b/gcc/testsuite/rust/execute/torture/macros11.rs new file mode 100644 index 0000000..7ce7d80 --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/macros11.rs @@ -0,0 +1,22 @@ +// { dg-output "2" } +extern "C" { + fn printf(s: *const i8, ...); +} + +fn print_int(value: i32) { + let s = "%d\n\0"; + let s_p = s as *const str; + let c_p = s_p as *const i8; + unsafe { printf(c_p, value); } +} + +macro_rules! add_exprs { + ($($e:expr)?) => (0 $(+ $e)?) +} + +fn main() -> i32 { + // 2 + print_int(add_exprs!(2)); + + 0 +} diff --git a/gcc/testsuite/rust/execute/torture/macros12.rs b/gcc/testsuite/rust/execute/torture/macros12.rs new file mode 100644 index 0000000..ff4a862 --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/macros12.rs @@ -0,0 +1,20 @@ +// { dg-output "0\n" } +extern "C" { + fn printf(s: *const i8, ...); +} + +fn print_int(value: i32) { + let s = "%d\n\0" as *const str as *const i8; + printf(s, value); +} + +macro_rules! add_exprs { + ($($e:expr)?) => (0 $(+ $e)?) +} + +fn main() -> i32 { + // 0 + print_int(add_exprs!()); + + 0 +} diff --git a/gcc/testsuite/rust/execute/torture/macros13.rs b/gcc/testsuite/rust/execute/torture/macros13.rs new file mode 100644 index 0000000..af5dfe8 --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/macros13.rs @@ -0,0 +1,20 @@ +// { dg-output "18\n" } +extern "C" { + fn printf(s: *const i8, ...); +} + +fn print_int(value: i32) { + let s = "%d\n\0" as *const str as *const i8; + printf(s, value); +} + +macro_rules! add_exprs { + ($($e:expr)+) => (0 $(+ $e)+) +} + +fn main() -> i32 { + // 1 + 2 + 15 => 18 + print_int(add_exprs!(1 2 15)); + + 0 +} diff --git a/gcc/testsuite/rust/execute/torture/macros14.rs b/gcc/testsuite/rust/execute/torture/macros14.rs new file mode 100644 index 0000000..2dc95e3 --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/macros14.rs @@ -0,0 +1,20 @@ +// { dg-output "15\n" } +extern "C" { + fn printf(s: *const i8, ...); +} + +fn print_int(value: i32) { + let s = "%d\n\0" as *const str as *const i8; + printf(s, value); +} + +macro_rules! add_exprs { + ($($e:expr)*) => (15 $(+ $e)*) +} + +fn main() -> i32 { + // 15 + print_int(add_exprs!()); + + 0 +} |