diff options
Diffstat (limited to 'gcc/rust/lex')
| -rw-r--r-- | gcc/rust/lex/rust-lex.cc | 66 | ||||
| -rw-r--r-- | gcc/rust/lex/rust-lex.h | 3 | ||||
| -rw-r--r-- | gcc/rust/lex/rust-token.cc | 34 | ||||
| -rw-r--r-- | gcc/rust/lex/rust-token.h | 98 |
4 files changed, 92 insertions, 109 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 8490638..a99b5ed 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -21,7 +21,7 @@ #include "rust-lex.h" #include "rust-diagnostics.h" #include "rust-linemap.h" -#include "rust-session-manager.h" +#include "rust-edition.h" #include "safe-ctype.h" #include "cpplib.h" #include "rust-keyword-values.h" @@ -236,10 +236,11 @@ Lexer::dump_and_skip (int n) out << "<id="; out << tok->token_id_to_str (); - out << (tok->has_str () ? (std::string (", text=") + tok->get_str () - + std::string (", typehint=") - + std::string (tok->get_type_hint_str ())) - : "") + out << (tok->should_have_str () + ? (std::string (", text=") + tok->get_str () + + std::string (", typehint=") + + std::string (tok->get_type_hint_str ())) + : "") << " "; out << Linemap::location_to_string (loc) << '\n'; } @@ -277,9 +278,7 @@ Lexer::classify_keyword (const std::string &str) // https://doc.rust-lang.org/reference/keywords.html#reserved-keywords // `try` is not a reserved keyword before 2018 - if (Session::get_instance ().options.get_edition () - == CompileOptions::Edition::E2015 - && id == TRY) + if (get_rust_edition () == Edition::E2015 && id == TRY) return IDENTIFIER; return id; @@ -1319,7 +1318,8 @@ Lexer::parse_escape (char opening_char) switch (current_char.value) { - case 'x': { + case 'x': + { auto hex_escape_pair = parse_partial_hex_escape (); long hexLong = hex_escape_pair.first; additional_length_offset += hex_escape_pair.second; @@ -1402,7 +1402,8 @@ Lexer::parse_utf8_escape () switch (current_char.value) { - case 'x': { + case 'x': + { auto hex_escape_pair = parse_partial_hex_escape (); long hexLong = hex_escape_pair.first; additional_length_offset += hex_escape_pair.second; @@ -1440,7 +1441,8 @@ Lexer::parse_utf8_escape () case '"': output_char = '"'; break; - case 'u': { + case 'u': + { auto unicode_escape_pair = parse_partial_unicode_escape (); output_char = unicode_escape_pair.first; additional_length_offset += unicode_escape_pair.second; @@ -1896,6 +1898,11 @@ Lexer::parse_raw_byte_string (location_t loc) break; } } + else if (current_char.is_eof ()) + { + rust_error_at (string_begin_locus, "unended raw byte string literal"); + return Token::make (END_OF_FILE, get_current_location ()); + } else if (current_char.value > 127) { rust_error_at (get_current_location (), @@ -1903,11 +1910,6 @@ Lexer::parse_raw_byte_string (location_t loc) current_char.as_string ().c_str ()); current_char = 0; } - else if (current_char.is_eof ()) - { - rust_error_at (string_begin_locus, "unended raw byte string literal"); - return Token::make (END_OF_FILE, get_current_location ()); - } length++; current_column++; @@ -2637,37 +2639,37 @@ void rust_input_source_test () { // ASCII - std::string src = u8"_abcde\tXYZ\v\f"; - std::vector<uint32_t> expected - = {'_', 'a', 'b', 'c', 'd', 'e', '\t', 'X', 'Y', 'Z', '\v', '\f'}; + std::string src = (const char *) u8"_abcde\tXYZ\v\f"; + std::vector<uint32_t> expected = {u'_', u'a', u'b', u'c', u'd', u'e', + u'\t', u'X', u'Y', u'Z', u'\v', u'\f'}; test_buffer_input_source (src, expected); // BOM - src = u8"\xef\xbb\xbfOK"; - expected = {'O', 'K'}; + src = (const char *) u8"\xef\xbb\xbfOK"; + expected = {u'O', u'K'}; test_buffer_input_source (src, expected); // Russian - src = u8"приве́т"; - expected = {L'п', - L'р', - L'и', - L'в', + src = (const char *) u8"приве́т"; + expected = {u'п', + u'р', + u'и', + u'в', 0x0435 /* CYRILLIC SMALL LETTER IE е */, 0x301 /* COMBINING ACUTE ACCENT ́ */, - L'т'}; + u'т'}; test_buffer_input_source (src, expected); - src = u8"❤️🦀"; + src = (const char *) u8"❤️🦀"; expected = {0x2764 /* HEAVY BLACK HEART */, - 0xfe0f /* VARIATION SELECTOR-16 */, L'🦀'}; + 0xfe0f /* VARIATION SELECTOR-16 */, U'🦀'}; test_buffer_input_source (src, expected); - src = u8"こんにちは"; - expected = {L'こ', L'ん', L'に', L'ち', L'は'}; + src = (const char *) u8"こんにちは"; + expected = {u'こ', u'ん', u'に', u'ち', u'は'}; test_file_input_source (src, expected); - src = u8"👮♂👩⚕"; + src = (const char *) u8"👮♂👩⚕"; expected = {0x1f46e /* POLICE OFFICER */, 0x200d /* ZERO WIDTH JOINER */, 0x2642 /* MALE SIGN */, 0x1f469 /* WOMAN */, diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h index 10293e0..383ffac 100644 --- a/gcc/rust/lex/rust-lex.h +++ b/gcc/rust/lex/rust-lex.h @@ -263,8 +263,7 @@ private: #if CHECKING_P namespace selftest { -void -rust_input_source_test (); +void rust_input_source_test (); } // namespace selftest diff --git a/gcc/rust/lex/rust-token.cc b/gcc/rust/lex/rust-token.cc index 8493889..43682af 100644 --- a/gcc/rust/lex/rust-token.cc +++ b/gcc/rust/lex/rust-token.cc @@ -20,6 +20,7 @@ #include "rust-token.h" #include "rust-diagnostics.h" #include "rust-unicode.h" +#include "rust-ast.h" namespace Rust { // Hackily defined way to get token description for enum value using x-macros @@ -88,7 +89,8 @@ token_id_keyword_string (TokenId id) switch (id) { #define RS_TOKEN_KEYWORD_2015(id, str_ptr) \ - case id: { \ + case id: \ + { \ static const std::string str (str_ptr); \ return str; \ } \ @@ -176,29 +178,6 @@ nfc_normalize_token_string (location_t loc, TokenId id, const std::string &str) return str; } -const std::string & -Token::get_str () const -{ - if (token_id_is_keyword (token_id)) - return token_id_keyword_string (token_id); - - // FIXME: attempt to return null again - // gcc_assert(str != NULL); - - // HACK: allow referencing an empty string - static const std::string empty = ""; - - if (str == NULL) - { - rust_error_at (get_locus (), - "attempted to get string for %qs, which has no string. " - "returning empty string instead", - get_token_description ()); - return empty; - } - return *str; -} - namespace { enum class Context { @@ -234,6 +213,13 @@ escape_special_chars (const std::string &source, Context ctx) } // namespace +TokenPtr +Token::make_identifier (const Identifier &ident) +{ + std::string str = ident; + return make_identifier (ident.get_locus (), std::move (str)); +} + std::string Token::as_string () const { diff --git a/gcc/rust/lex/rust-token.h b/gcc/rust/lex/rust-token.h index aa2f308..71a7503 100644 --- a/gcc/rust/lex/rust-token.h +++ b/gcc/rust/lex/rust-token.h @@ -21,10 +21,14 @@ #include "rust-system.h" #include "rust-linemap.h" -#include "rust-make-unique.h" #include "rust-unicode.h" +#include "rust-diagnostics.h" namespace Rust { + +// used by Rust::Token::make_identifier +class Identifier; + // "Primitive core types" in Rust - the different int and float types, as well // as some others enum PrimitiveCoreType @@ -222,25 +226,20 @@ typedef std::shared_ptr<Token> TokenPtr; typedef std::shared_ptr<const Token> const_TokenPtr; // Hackily defined way to get token description for enum value using x-macros -const char * -get_token_description (TokenId id); +const char *get_token_description (TokenId id); /* Hackily defined way to get token description as a string for enum value using * x-macros */ -const char * -token_id_to_str (TokenId id); +const char *token_id_to_str (TokenId id); /* checks if a token is a keyword */ -bool -token_id_is_keyword (TokenId id); +bool token_id_is_keyword (TokenId id); /* gets the string associated with a keyword */ -const std::string & -token_id_keyword_string (TokenId id); +const std::string &token_id_keyword_string (TokenId id); // Get type hint description as a string. -const char * -get_type_hint_string (PrimitiveCoreType type); +const char *get_type_hint_string (PrimitiveCoreType type); /* Normalize string if a token is a identifier */ -std::string -nfc_normalize_token_string (location_t loc, TokenId id, const std::string &str); +std::string nfc_normalize_token_string (location_t loc, TokenId id, + const std::string &str); // Represents a single token. Create using factory static methods. class Token @@ -251,7 +250,7 @@ private: // Token location. location_t locus; // Associated text (if any) of token. - std::unique_ptr<std::string> str; + std::string str; // TODO: maybe remove issues and just store std::string as value? /* Type hint for token based on lexer data (e.g. type suffix). Does not exist * for most tokens. */ @@ -259,23 +258,21 @@ private: // Token constructor from token id and location. Has a null string. Token (TokenId token_id, location_t location) - : token_id (token_id), locus (location), str (nullptr), - type_hint (CORETYPE_UNKNOWN) + : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) {} // Token constructor from token id, location, and a string. - Token (TokenId token_id, location_t location, std::string &¶mStr) + Token (TokenId token_id, location_t location, std::string paramStr) : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) { // Normalize identifier tokens - str = Rust::make_unique<std::string> ( - nfc_normalize_token_string (location, token_id, paramStr)); + str = nfc_normalize_token_string (location, token_id, std::move (paramStr)); } // Token constructor from token id, location, and a char. Token (TokenId token_id, location_t location, char paramChar) - : token_id (token_id), locus (location), - str (new std::string (1, paramChar)), type_hint (CORETYPE_UNKNOWN) + : token_id (token_id), locus (location), str (1, paramChar), + type_hint (CORETYPE_UNKNOWN) { // Do not need to normalize 1byte char } @@ -285,19 +282,17 @@ private: : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) { // Normalize identifier tokens - str = Rust::make_unique<std::string> ( - nfc_normalize_token_string (location, token_id, - paramCodepoint.as_string ())); + str = nfc_normalize_token_string (location, token_id, + paramCodepoint.as_string ()); } // Token constructor from token id, location, a string, and type hint. - Token (TokenId token_id, location_t location, std::string &¶mStr, + Token (TokenId token_id, location_t location, std::string paramStr, PrimitiveCoreType parType) : token_id (token_id), locus (location), type_hint (parType) { // Normalize identifier tokens - str = Rust::make_unique<std::string> ( - nfc_normalize_token_string (location, token_id, paramStr)); + str = nfc_normalize_token_string (location, token_id, std::move (paramStr)); } public: @@ -324,14 +319,16 @@ public: } // Makes and returns a new TokenPtr of type IDENTIFIER. - static TokenPtr make_identifier (location_t locus, std::string &&str) + static TokenPtr make_identifier (location_t locus, std::string str) { // return std::make_shared<Token> (IDENTIFIER, locus, str); return TokenPtr (new Token (IDENTIFIER, locus, std::move (str))); } + static TokenPtr make_identifier (const Identifier &ident); + // Makes and returns a new TokenPtr of type INT_LITERAL. - static TokenPtr make_int (location_t locus, std::string &&str, + static TokenPtr make_int (location_t locus, std::string str, PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) { // return std::make_shared<Token> (INT_LITERAL, locus, str, type_hint); @@ -340,7 +337,7 @@ public: } // Makes and returns a new TokenPtr of type FLOAT_LITERAL. - static TokenPtr make_float (location_t locus, std::string &&str, + static TokenPtr make_float (location_t locus, std::string str, PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) { // return std::make_shared<Token> (FLOAT_LITERAL, locus, str, type_hint); @@ -349,7 +346,7 @@ public: } // Makes and returns a new TokenPtr of type STRING_LITERAL. - static TokenPtr make_string (location_t locus, std::string &&str) + static TokenPtr make_string (location_t locus, std::string str) { // return std::make_shared<Token> (STRING_LITERAL, locus, str, // CORETYPE_STR); @@ -372,32 +369,32 @@ public: } // Makes and returns a new TokenPtr of type BYTE_STRING_LITERAL (fix). - static TokenPtr make_byte_string (location_t locus, std::string &&str) + static TokenPtr make_byte_string (location_t locus, std::string str) { // return std::make_shared<Token> (BYTE_STRING_LITERAL, locus, str); return TokenPtr (new Token (BYTE_STRING_LITERAL, locus, std::move (str))); } // Makes and returns a new TokenPtr of type RAW_STRING_LITERAL. - static TokenPtr make_raw_string (location_t locus, std::string &&str) + static TokenPtr make_raw_string (location_t locus, std::string str) { return TokenPtr (new Token (RAW_STRING_LITERAL, locus, std::move (str))); } // Makes and returns a new TokenPtr of type INNER_DOC_COMMENT. - static TokenPtr make_inner_doc_comment (location_t locus, std::string &&str) + static TokenPtr make_inner_doc_comment (location_t locus, std::string str) { return TokenPtr (new Token (INNER_DOC_COMMENT, locus, std::move (str))); } // Makes and returns a new TokenPtr of type OUTER_DOC_COMMENT. - static TokenPtr make_outer_doc_comment (location_t locus, std::string &&str) + static TokenPtr make_outer_doc_comment (location_t locus, std::string str) { return TokenPtr (new Token (OUTER_DOC_COMMENT, locus, std::move (str))); } // Makes and returns a new TokenPtr of type LIFETIME. - static TokenPtr make_lifetime (location_t locus, std::string &&str) + static TokenPtr make_lifetime (location_t locus, std::string str) { // return std::make_shared<Token> (LIFETIME, locus, str); return TokenPtr (new Token (LIFETIME, locus, std::move (str))); @@ -413,16 +410,18 @@ public: void set_locus (location_t locus) { this->locus = locus; } // Gets string description of the token. - const std::string & - get_str () const; /*{ -// FIXME: put in header again when fix null problem -//gcc_assert(str != nullptr); -if (str == nullptr) { -error_at(get_locus(), "attempted to get string for '%s', which has no string. -returning empty string instead.", get_token_description()); return ""; -} -return *str; -}*/ + const std::string &get_str () const + { + if (token_id_is_keyword (token_id)) + return token_id_keyword_string (token_id); + + if (!should_have_str ()) + rust_internal_error_at ( + locus, "attempting to get string for %qs, which should have no string", + get_token_description ()); + + return str; + } // Gets token's type hint info. PrimitiveCoreType get_type_hint () const @@ -464,14 +463,11 @@ return *str; } } - /* Returns whether the token actually has a string (regardless of whether it - * should or not). */ - bool has_str () const { return str != nullptr; } - // Returns whether the token should have a string. bool should_have_str () const { - return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME; + return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME + || token_id == INNER_DOC_COMMENT || token_id == OUTER_DOC_COMMENT; } // Returns whether the token is a pure decimal int literal |
