diff options
author | Pierre-Emmanuel Patry <pierre-emmanuel.patry@embecosm.com> | 2023-10-17 13:24:54 +0200 |
---|---|---|
committer | CohenArthur <arthur.cohen@embecosm.com> | 2023-11-06 10:08:09 +0000 |
commit | 7de72c6684d74cf6a2d3fa00a2426eab3e949a60 (patch) | |
tree | 2b54b4842ed469f4568d342f00e4f7a5c131d27c | |
parent | 0a2f8fc8f28066f05517a57bfa43551e238f01ac (diff) | |
download | gcc-7de72c6684d74cf6a2d3fa00a2426eab3e949a60.zip gcc-7de72c6684d74cf6a2d3fa00a2426eab3e949a60.tar.gz gcc-7de72c6684d74cf6a2d3fa00a2426eab3e949a60.tar.bz2 |
Fix token lexed as a float literal
The lexer cannot distinguish the difference between a float literal and a
tuple index in some cases. This means we should fix this while parsing
depending on the context.
gcc/rust/ChangeLog:
* expand/rust-macro-invoc-lexer.cc (MacroInvocLexer::split_current_token):
Add implementation for multiple token split.
* expand/rust-macro-invoc-lexer.h: Add function prototype.
* expand/rust-proc-macro-invoc-lexer.cc (ProcMacroInvocLexer::split_current_token):
Add implementation for 2+ token split for procedural macros.
* expand/rust-proc-macro-invoc-lexer.h: Add function prototype.
* lex/rust-lex.cc (Lexer::split_current_token): Add function to split a
token in multiple other tokens.
* lex/rust-lex.h: Add function prototype for split_current_token.
* parse/rust-parse-impl.h (Parser::left_denotation): Handle float tuple
index identified as a float literal.
Signed-off-by: Pierre-Emmanuel Patry <pierre-emmanuel.patry@embecosm.com>
-rw-r--r-- | gcc/rust/expand/rust-macro-invoc-lexer.cc | 16 | ||||
-rw-r--r-- | gcc/rust/expand/rust-macro-invoc-lexer.h | 2 | ||||
-rw-r--r-- | gcc/rust/expand/rust-proc-macro-invoc-lexer.cc | 15 | ||||
-rw-r--r-- | gcc/rust/expand/rust-proc-macro-invoc-lexer.h | 2 | ||||
-rw-r--r-- | gcc/rust/lex/rust-lex.cc | 12 | ||||
-rw-r--r-- | gcc/rust/lex/rust-lex.h | 2 | ||||
-rw-r--r-- | gcc/rust/parse/rust-parse-impl.h | 22 |
7 files changed, 71 insertions, 0 deletions
diff --git a/gcc/rust/expand/rust-macro-invoc-lexer.cc b/gcc/rust/expand/rust-macro-invoc-lexer.cc index aa1e1ff..003c87d 100644 --- a/gcc/rust/expand/rust-macro-invoc-lexer.cc +++ b/gcc/rust/expand/rust-macro-invoc-lexer.cc @@ -49,6 +49,22 @@ MacroInvocLexer::split_current_token (TokenId new_left, TokenId new_right) std::unique_ptr<AST::Token> (new AST::Token (l_tok))); } +void +MacroInvocLexer::split_current_token (std::vector<TokenPtr> new_tokens) +{ + rust_assert (new_tokens.size () > 0); + + auto current_pos = token_stream.begin () + offs; + + token_stream.erase (current_pos); + + for (size_t i = 1; i < new_tokens.size (); i++) + { + token_stream.insert (current_pos + i, std::unique_ptr<AST::Token> ( + new AST::Token (new_tokens[i]))); + } +} + std::vector<std::unique_ptr<AST::Token>> MacroInvocLexer::get_token_slice (size_t start_idx, size_t end_idx) const { diff --git a/gcc/rust/expand/rust-macro-invoc-lexer.h b/gcc/rust/expand/rust-macro-invoc-lexer.h index dd33251..de279d8 100644 --- a/gcc/rust/expand/rust-macro-invoc-lexer.h +++ b/gcc/rust/expand/rust-macro-invoc-lexer.h @@ -67,6 +67,8 @@ public: // this will only work with "simple" tokens like punctuation. void split_current_token (TokenId new_left, TokenId new_right); + void split_current_token (std::vector<TokenPtr> new_tokens); + std::vector<std::unique_ptr<AST::Token>> get_token_slice (size_t start_idx, size_t end_idx) const; }; diff --git a/gcc/rust/expand/rust-proc-macro-invoc-lexer.cc b/gcc/rust/expand/rust-proc-macro-invoc-lexer.cc index 5990dec..1c370c1 100644 --- a/gcc/rust/expand/rust-proc-macro-invoc-lexer.cc +++ b/gcc/rust/expand/rust-proc-macro-invoc-lexer.cc @@ -47,4 +47,19 @@ ProcMacroInvocLexer::split_current_token (TokenId new_left, TokenId new_right) token_stream.insert (current_pos, r_tok); } +void +ProcMacroInvocLexer::split_current_token (std::vector<TokenPtr> new_tokens) +{ + rust_assert (new_tokens.size () > 0); + + auto current_pos = token_stream.begin () + offs; + + token_stream.erase (current_pos); + + for (size_t i = 1; i < new_tokens.size (); i++) + { + token_stream.insert (current_pos + i, new_tokens[i]); + } +} + } // namespace Rust diff --git a/gcc/rust/expand/rust-proc-macro-invoc-lexer.h b/gcc/rust/expand/rust-proc-macro-invoc-lexer.h index 5a11a4c..94eb6d5 100644 --- a/gcc/rust/expand/rust-proc-macro-invoc-lexer.h +++ b/gcc/rust/expand/rust-proc-macro-invoc-lexer.h @@ -40,6 +40,8 @@ public: // closes (i.e. T<U<X>> where >> is wrongly lexed as one token). Note that // this will only work with "simple" tokens like punctuation. void split_current_token (TokenId new_left, TokenId new_right); + + void split_current_token (std::vector<TokenPtr> new_tokens); }; } // namespace Rust diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 0beb140..7e8607a 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -2530,6 +2530,18 @@ Lexer::split_current_token (TokenId new_left, TokenId new_right) } void +Lexer::split_current_token (std::vector<TokenPtr> new_tokens) +{ + rust_assert (new_tokens.size () > 0); + token_queue.replace_current_value (new_tokens[0]); + + for (size_t i = 1; i < new_tokens.size (); i++) + { + token_queue.insert (i, new_tokens[i]); + } +} + +void Lexer::start_line (int current_line, int current_column) { if (line_map) diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h index 6a29c0a..c5c7239 100644 --- a/gcc/rust/lex/rust-lex.h +++ b/gcc/rust/lex/rust-lex.h @@ -202,6 +202,8 @@ public: * this will only work with "simple" tokens like punctuation. */ void split_current_token (TokenId new_left, TokenId new_right); + void split_current_token (std::vector<TokenPtr> new_tokens); + Linemap *get_line_map () { return line_map; } std::string get_filename () { return std::string (input.get_filename ()); } diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h index 7b5fb83..d509cdf 100644 --- a/gcc/rust/parse/rust-parse-impl.h +++ b/gcc/rust/parse/rust-parse-impl.h @@ -12895,6 +12895,28 @@ Parser<ManagedTokenSource>::left_denotation (const_TokenPtr tok, std::move (outer_attrs), restrictions); } + else if (next_tok->get_id () == FLOAT_LITERAL) + { + // Lexer has misidentified a tuple index as a float literal + // eg: `(x, (y, z)).1.0` -> 1.0 has been identified as a float + // literal. This means we should split it into three new separate + // tokens, the first tuple index, the dot and the second tuple + // index. + auto current_loc = next_tok->get_locus (); + auto str = next_tok->get_str (); + auto dot_pos = str.find ("."); + auto prefix = str.substr (0, dot_pos); + auto suffix = str.substr (dot_pos + 1); + lexer.split_current_token ( + {Token::make_int (current_loc, std::move (prefix), + CORETYPE_PURE_DECIMAL), + Token::make (DOT, current_loc + 1), + Token::make_int (current_loc + 2, std::move (suffix), + CORETYPE_PURE_DECIMAL)}); + return parse_tuple_index_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + } else if (next_tok->get_id () == IDENTIFIER && lexer.peek_token (1)->get_id () != LEFT_PAREN && lexer.peek_token (1)->get_id () != SCOPE_RESOLUTION) |