aboutsummaryrefslogtreecommitdiff
path: root/gcc/rust/lex
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/rust/lex')
-rw-r--r--gcc/rust/lex/rust-lex.cc66
-rw-r--r--gcc/rust/lex/rust-lex.h3
-rw-r--r--gcc/rust/lex/rust-token.cc34
-rw-r--r--gcc/rust/lex/rust-token.h98
4 files changed, 92 insertions, 109 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index 8490638..a99b5ed 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -21,7 +21,7 @@
#include "rust-lex.h"
#include "rust-diagnostics.h"
#include "rust-linemap.h"
-#include "rust-session-manager.h"
+#include "rust-edition.h"
#include "safe-ctype.h"
#include "cpplib.h"
#include "rust-keyword-values.h"
@@ -236,10 +236,11 @@ Lexer::dump_and_skip (int n)
out << "<id=";
out << tok->token_id_to_str ();
- out << (tok->has_str () ? (std::string (", text=") + tok->get_str ()
- + std::string (", typehint=")
- + std::string (tok->get_type_hint_str ()))
- : "")
+ out << (tok->should_have_str ()
+ ? (std::string (", text=") + tok->get_str ()
+ + std::string (", typehint=")
+ + std::string (tok->get_type_hint_str ()))
+ : "")
<< " ";
out << Linemap::location_to_string (loc) << '\n';
}
@@ -277,9 +278,7 @@ Lexer::classify_keyword (const std::string &str)
// https://doc.rust-lang.org/reference/keywords.html#reserved-keywords
// `try` is not a reserved keyword before 2018
- if (Session::get_instance ().options.get_edition ()
- == CompileOptions::Edition::E2015
- && id == TRY)
+ if (get_rust_edition () == Edition::E2015 && id == TRY)
return IDENTIFIER;
return id;
@@ -1319,7 +1318,8 @@ Lexer::parse_escape (char opening_char)
switch (current_char.value)
{
- case 'x': {
+ case 'x':
+ {
auto hex_escape_pair = parse_partial_hex_escape ();
long hexLong = hex_escape_pair.first;
additional_length_offset += hex_escape_pair.second;
@@ -1402,7 +1402,8 @@ Lexer::parse_utf8_escape ()
switch (current_char.value)
{
- case 'x': {
+ case 'x':
+ {
auto hex_escape_pair = parse_partial_hex_escape ();
long hexLong = hex_escape_pair.first;
additional_length_offset += hex_escape_pair.second;
@@ -1440,7 +1441,8 @@ Lexer::parse_utf8_escape ()
case '"':
output_char = '"';
break;
- case 'u': {
+ case 'u':
+ {
auto unicode_escape_pair = parse_partial_unicode_escape ();
output_char = unicode_escape_pair.first;
additional_length_offset += unicode_escape_pair.second;
@@ -1896,6 +1898,11 @@ Lexer::parse_raw_byte_string (location_t loc)
break;
}
}
+ else if (current_char.is_eof ())
+ {
+ rust_error_at (string_begin_locus, "unended raw byte string literal");
+ return Token::make (END_OF_FILE, get_current_location ());
+ }
else if (current_char.value > 127)
{
rust_error_at (get_current_location (),
@@ -1903,11 +1910,6 @@ Lexer::parse_raw_byte_string (location_t loc)
current_char.as_string ().c_str ());
current_char = 0;
}
- else if (current_char.is_eof ())
- {
- rust_error_at (string_begin_locus, "unended raw byte string literal");
- return Token::make (END_OF_FILE, get_current_location ());
- }
length++;
current_column++;
@@ -2637,37 +2639,37 @@ void
rust_input_source_test ()
{
// ASCII
- std::string src = u8"_abcde\tXYZ\v\f";
- std::vector<uint32_t> expected
- = {'_', 'a', 'b', 'c', 'd', 'e', '\t', 'X', 'Y', 'Z', '\v', '\f'};
+ std::string src = (const char *) u8"_abcde\tXYZ\v\f";
+ std::vector<uint32_t> expected = {u'_', u'a', u'b', u'c', u'd', u'e',
+ u'\t', u'X', u'Y', u'Z', u'\v', u'\f'};
test_buffer_input_source (src, expected);
// BOM
- src = u8"\xef\xbb\xbfOK";
- expected = {'O', 'K'};
+ src = (const char *) u8"\xef\xbb\xbfOK";
+ expected = {u'O', u'K'};
test_buffer_input_source (src, expected);
// Russian
- src = u8"приве́т";
- expected = {L'п',
- L'р',
- L'и',
- L'в',
+ src = (const char *) u8"приве́т";
+ expected = {u'п',
+ u'р',
+ u'и',
+ u'в',
0x0435 /* CYRILLIC SMALL LETTER IE е */,
0x301 /* COMBINING ACUTE ACCENT ́ */,
- L'т'};
+ u'т'};
test_buffer_input_source (src, expected);
- src = u8"❤️🦀";
+ src = (const char *) u8"❤️🦀";
expected = {0x2764 /* HEAVY BLACK HEART */,
- 0xfe0f /* VARIATION SELECTOR-16 */, L'🦀'};
+ 0xfe0f /* VARIATION SELECTOR-16 */, U'🦀'};
test_buffer_input_source (src, expected);
- src = u8"こんにちは";
- expected = {L'こ', L'ん', L'に', L'ち', L'は'};
+ src = (const char *) u8"こんにちは";
+ expected = {u'こ', u'ん', u'に', u'ち', u'は'};
test_file_input_source (src, expected);
- src = u8"👮‍♂👩‍⚕";
+ src = (const char *) u8"👮‍♂👩‍⚕";
expected
= {0x1f46e /* POLICE OFFICER */, 0x200d /* ZERO WIDTH JOINER */,
0x2642 /* MALE SIGN */, 0x1f469 /* WOMAN */,
diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h
index 10293e0..383ffac 100644
--- a/gcc/rust/lex/rust-lex.h
+++ b/gcc/rust/lex/rust-lex.h
@@ -263,8 +263,7 @@ private:
#if CHECKING_P
namespace selftest {
-void
-rust_input_source_test ();
+void rust_input_source_test ();
} // namespace selftest
diff --git a/gcc/rust/lex/rust-token.cc b/gcc/rust/lex/rust-token.cc
index 8493889..43682af 100644
--- a/gcc/rust/lex/rust-token.cc
+++ b/gcc/rust/lex/rust-token.cc
@@ -20,6 +20,7 @@
#include "rust-token.h"
#include "rust-diagnostics.h"
#include "rust-unicode.h"
+#include "rust-ast.h"
namespace Rust {
// Hackily defined way to get token description for enum value using x-macros
@@ -88,7 +89,8 @@ token_id_keyword_string (TokenId id)
switch (id)
{
#define RS_TOKEN_KEYWORD_2015(id, str_ptr) \
- case id: { \
+ case id: \
+ { \
static const std::string str (str_ptr); \
return str; \
} \
@@ -176,29 +178,6 @@ nfc_normalize_token_string (location_t loc, TokenId id, const std::string &str)
return str;
}
-const std::string &
-Token::get_str () const
-{
- if (token_id_is_keyword (token_id))
- return token_id_keyword_string (token_id);
-
- // FIXME: attempt to return null again
- // gcc_assert(str != NULL);
-
- // HACK: allow referencing an empty string
- static const std::string empty = "";
-
- if (str == NULL)
- {
- rust_error_at (get_locus (),
- "attempted to get string for %qs, which has no string. "
- "returning empty string instead",
- get_token_description ());
- return empty;
- }
- return *str;
-}
-
namespace {
enum class Context
{
@@ -234,6 +213,13 @@ escape_special_chars (const std::string &source, Context ctx)
} // namespace
+TokenPtr
+Token::make_identifier (const Identifier &ident)
+{
+ std::string str = ident;
+ return make_identifier (ident.get_locus (), std::move (str));
+}
+
std::string
Token::as_string () const
{
diff --git a/gcc/rust/lex/rust-token.h b/gcc/rust/lex/rust-token.h
index aa2f308..71a7503 100644
--- a/gcc/rust/lex/rust-token.h
+++ b/gcc/rust/lex/rust-token.h
@@ -21,10 +21,14 @@
#include "rust-system.h"
#include "rust-linemap.h"
-#include "rust-make-unique.h"
#include "rust-unicode.h"
+#include "rust-diagnostics.h"
namespace Rust {
+
+// used by Rust::Token::make_identifier
+class Identifier;
+
// "Primitive core types" in Rust - the different int and float types, as well
// as some others
enum PrimitiveCoreType
@@ -222,25 +226,20 @@ typedef std::shared_ptr<Token> TokenPtr;
typedef std::shared_ptr<const Token> const_TokenPtr;
// Hackily defined way to get token description for enum value using x-macros
-const char *
-get_token_description (TokenId id);
+const char *get_token_description (TokenId id);
/* Hackily defined way to get token description as a string for enum value using
* x-macros */
-const char *
-token_id_to_str (TokenId id);
+const char *token_id_to_str (TokenId id);
/* checks if a token is a keyword */
-bool
-token_id_is_keyword (TokenId id);
+bool token_id_is_keyword (TokenId id);
/* gets the string associated with a keyword */
-const std::string &
-token_id_keyword_string (TokenId id);
+const std::string &token_id_keyword_string (TokenId id);
// Get type hint description as a string.
-const char *
-get_type_hint_string (PrimitiveCoreType type);
+const char *get_type_hint_string (PrimitiveCoreType type);
/* Normalize string if a token is a identifier */
-std::string
-nfc_normalize_token_string (location_t loc, TokenId id, const std::string &str);
+std::string nfc_normalize_token_string (location_t loc, TokenId id,
+ const std::string &str);
// Represents a single token. Create using factory static methods.
class Token
@@ -251,7 +250,7 @@ private:
// Token location.
location_t locus;
// Associated text (if any) of token.
- std::unique_ptr<std::string> str;
+ std::string str;
// TODO: maybe remove issues and just store std::string as value?
/* Type hint for token based on lexer data (e.g. type suffix). Does not exist
* for most tokens. */
@@ -259,23 +258,21 @@ private:
// Token constructor from token id and location. Has a null string.
Token (TokenId token_id, location_t location)
- : token_id (token_id), locus (location), str (nullptr),
- type_hint (CORETYPE_UNKNOWN)
+ : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN)
{}
// Token constructor from token id, location, and a string.
- Token (TokenId token_id, location_t location, std::string &&paramStr)
+ Token (TokenId token_id, location_t location, std::string paramStr)
: token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN)
{
// Normalize identifier tokens
- str = Rust::make_unique<std::string> (
- nfc_normalize_token_string (location, token_id, paramStr));
+ str = nfc_normalize_token_string (location, token_id, std::move (paramStr));
}
// Token constructor from token id, location, and a char.
Token (TokenId token_id, location_t location, char paramChar)
- : token_id (token_id), locus (location),
- str (new std::string (1, paramChar)), type_hint (CORETYPE_UNKNOWN)
+ : token_id (token_id), locus (location), str (1, paramChar),
+ type_hint (CORETYPE_UNKNOWN)
{
// Do not need to normalize 1byte char
}
@@ -285,19 +282,17 @@ private:
: token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN)
{
// Normalize identifier tokens
- str = Rust::make_unique<std::string> (
- nfc_normalize_token_string (location, token_id,
- paramCodepoint.as_string ()));
+ str = nfc_normalize_token_string (location, token_id,
+ paramCodepoint.as_string ());
}
// Token constructor from token id, location, a string, and type hint.
- Token (TokenId token_id, location_t location, std::string &&paramStr,
+ Token (TokenId token_id, location_t location, std::string paramStr,
PrimitiveCoreType parType)
: token_id (token_id), locus (location), type_hint (parType)
{
// Normalize identifier tokens
- str = Rust::make_unique<std::string> (
- nfc_normalize_token_string (location, token_id, paramStr));
+ str = nfc_normalize_token_string (location, token_id, std::move (paramStr));
}
public:
@@ -324,14 +319,16 @@ public:
}
// Makes and returns a new TokenPtr of type IDENTIFIER.
- static TokenPtr make_identifier (location_t locus, std::string &&str)
+ static TokenPtr make_identifier (location_t locus, std::string str)
{
// return std::make_shared<Token> (IDENTIFIER, locus, str);
return TokenPtr (new Token (IDENTIFIER, locus, std::move (str)));
}
+ static TokenPtr make_identifier (const Identifier &ident);
+
// Makes and returns a new TokenPtr of type INT_LITERAL.
- static TokenPtr make_int (location_t locus, std::string &&str,
+ static TokenPtr make_int (location_t locus, std::string str,
PrimitiveCoreType type_hint = CORETYPE_UNKNOWN)
{
// return std::make_shared<Token> (INT_LITERAL, locus, str, type_hint);
@@ -340,7 +337,7 @@ public:
}
// Makes and returns a new TokenPtr of type FLOAT_LITERAL.
- static TokenPtr make_float (location_t locus, std::string &&str,
+ static TokenPtr make_float (location_t locus, std::string str,
PrimitiveCoreType type_hint = CORETYPE_UNKNOWN)
{
// return std::make_shared<Token> (FLOAT_LITERAL, locus, str, type_hint);
@@ -349,7 +346,7 @@ public:
}
// Makes and returns a new TokenPtr of type STRING_LITERAL.
- static TokenPtr make_string (location_t locus, std::string &&str)
+ static TokenPtr make_string (location_t locus, std::string str)
{
// return std::make_shared<Token> (STRING_LITERAL, locus, str,
// CORETYPE_STR);
@@ -372,32 +369,32 @@ public:
}
// Makes and returns a new TokenPtr of type BYTE_STRING_LITERAL (fix).
- static TokenPtr make_byte_string (location_t locus, std::string &&str)
+ static TokenPtr make_byte_string (location_t locus, std::string str)
{
// return std::make_shared<Token> (BYTE_STRING_LITERAL, locus, str);
return TokenPtr (new Token (BYTE_STRING_LITERAL, locus, std::move (str)));
}
// Makes and returns a new TokenPtr of type RAW_STRING_LITERAL.
- static TokenPtr make_raw_string (location_t locus, std::string &&str)
+ static TokenPtr make_raw_string (location_t locus, std::string str)
{
return TokenPtr (new Token (RAW_STRING_LITERAL, locus, std::move (str)));
}
// Makes and returns a new TokenPtr of type INNER_DOC_COMMENT.
- static TokenPtr make_inner_doc_comment (location_t locus, std::string &&str)
+ static TokenPtr make_inner_doc_comment (location_t locus, std::string str)
{
return TokenPtr (new Token (INNER_DOC_COMMENT, locus, std::move (str)));
}
// Makes and returns a new TokenPtr of type OUTER_DOC_COMMENT.
- static TokenPtr make_outer_doc_comment (location_t locus, std::string &&str)
+ static TokenPtr make_outer_doc_comment (location_t locus, std::string str)
{
return TokenPtr (new Token (OUTER_DOC_COMMENT, locus, std::move (str)));
}
// Makes and returns a new TokenPtr of type LIFETIME.
- static TokenPtr make_lifetime (location_t locus, std::string &&str)
+ static TokenPtr make_lifetime (location_t locus, std::string str)
{
// return std::make_shared<Token> (LIFETIME, locus, str);
return TokenPtr (new Token (LIFETIME, locus, std::move (str)));
@@ -413,16 +410,18 @@ public:
void set_locus (location_t locus) { this->locus = locus; }
// Gets string description of the token.
- const std::string &
- get_str () const; /*{
-// FIXME: put in header again when fix null problem
-//gcc_assert(str != nullptr);
-if (str == nullptr) {
-error_at(get_locus(), "attempted to get string for '%s', which has no string.
-returning empty string instead.", get_token_description()); return "";
-}
-return *str;
-}*/
+ const std::string &get_str () const
+ {
+ if (token_id_is_keyword (token_id))
+ return token_id_keyword_string (token_id);
+
+ if (!should_have_str ())
+ rust_internal_error_at (
+ locus, "attempting to get string for %qs, which should have no string",
+ get_token_description ());
+
+ return str;
+ }
// Gets token's type hint info.
PrimitiveCoreType get_type_hint () const
@@ -464,14 +463,11 @@ return *str;
}
}
- /* Returns whether the token actually has a string (regardless of whether it
- * should or not). */
- bool has_str () const { return str != nullptr; }
-
// Returns whether the token should have a string.
bool should_have_str () const
{
- return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME;
+ return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME
+ || token_id == INNER_DOC_COMMENT || token_id == OUTER_DOC_COMMENT;
}
// Returns whether the token is a pure decimal int literal