4 files changed, 92 insertions, 109 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index 8490638..a99b5ed 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -21,7 +21,7 @@
 #include "rust-lex.h"
 #include "rust-diagnostics.h"
 #include "rust-linemap.h"
-#include "rust-session-manager.h"
+#include "rust-edition.h"
 #include "safe-ctype.h"
 #include "cpplib.h"
 #include "rust-keyword-values.h"
@@ -236,10 +236,11 @@ Lexer::dump_and_skip (int n)
 
 	  out << "<id=";
 	  out << tok->token_id_to_str ();
-	  out << (tok->has_str () ? (std::string (", text=") + tok->get_str ()
-				     + std::string (", typehint=")
-				     + std::string (tok->get_type_hint_str ()))
-				  : "")
+	  out << (tok->should_have_str ()
+		    ? (std::string (", text=") + tok->get_str ()
+		       + std::string (", typehint=")
+		       + std::string (tok->get_type_hint_str ()))
+		    : "")
 	      << " ";
 	  out << Linemap::location_to_string (loc) << '\n';
 	}
@@ -277,9 +278,7 @@ Lexer::classify_keyword (const std::string &str)
   // https://doc.rust-lang.org/reference/keywords.html#reserved-keywords
 
   // `try` is not a reserved keyword before 2018
-  if (Session::get_instance ().options.get_edition ()
-	== CompileOptions::Edition::E2015
-      && id == TRY)
+  if (get_rust_edition () == Edition::E2015 && id == TRY)
     return IDENTIFIER;
 
   return id;
@@ -1319,7 +1318,8 @@ Lexer::parse_escape (char opening_char)
 
   switch (current_char.value)
     {
-      case 'x': {
+    case 'x':
+      {
 	auto hex_escape_pair = parse_partial_hex_escape ();
 	long hexLong = hex_escape_pair.first;
 	additional_length_offset += hex_escape_pair.second;
@@ -1402,7 +1402,8 @@ Lexer::parse_utf8_escape ()
 
   switch (current_char.value)
     {
-      case 'x': {
+    case 'x':
+      {
 	auto hex_escape_pair = parse_partial_hex_escape ();
 	long hexLong = hex_escape_pair.first;
 	additional_length_offset += hex_escape_pair.second;
@@ -1440,7 +1441,8 @@ Lexer::parse_utf8_escape ()
     case '"':
       output_char = '"';
       break;
-      case 'u': {
+    case 'u':
+      {
 	auto unicode_escape_pair = parse_partial_unicode_escape ();
 	output_char = unicode_escape_pair.first;
 	additional_length_offset += unicode_escape_pair.second;
@@ -1896,6 +1898,11 @@ Lexer::parse_raw_byte_string (location_t loc)
 	      break;
 	    }
 	}
+      else if (current_char.is_eof ())
+	{
+	  rust_error_at (string_begin_locus, "unended raw byte string literal");
+	  return Token::make (END_OF_FILE, get_current_location ());
+	}
       else if (current_char.value > 127)
 	{
 	  rust_error_at (get_current_location (),
@@ -1903,11 +1910,6 @@ Lexer::parse_raw_byte_string (location_t loc)
 			 current_char.as_string ().c_str ());
 	  current_char = 0;
 	}
-      else if (current_char.is_eof ())
-	{
-	  rust_error_at (string_begin_locus, "unended raw byte string literal");
-	  return Token::make (END_OF_FILE, get_current_location ());
-	}
 
       length++;
       current_column++;
@@ -2637,37 +2639,37 @@ void
 rust_input_source_test ()
 {
   // ASCII
-  std::string src = u8"_abcde\tXYZ\v\f";
-  std::vector<uint32_t> expected
-    = {'_', 'a', 'b', 'c', 'd', 'e', '\t', 'X', 'Y', 'Z', '\v', '\f'};
+  std::string src = (const char *) u8"_abcde\tXYZ\v\f";
+  std::vector<uint32_t> expected = {u'_',  u'a', u'b', u'c', u'd',  u'e',
+				    u'\t', u'X', u'Y', u'Z', u'\v', u'\f'};
   test_buffer_input_source (src, expected);
 
   // BOM
-  src = u8"\xef\xbb\xbfOK";
-  expected = {'O', 'K'};
+  src = (const char *) u8"\xef\xbb\xbfOK";
+  expected = {u'O', u'K'};
   test_buffer_input_source (src, expected);
 
   // Russian
-  src = u8"приве́т";
-  expected = {L'п',
-	      L'р',
-	      L'и',
-	      L'в',
+  src = (const char *) u8"приве́т";
+  expected = {u'п',
+	      u'р',
+	      u'и',
+	      u'в',
 	      0x0435 /* CYRILLIC SMALL LETTER IE е */,
 	      0x301 /* COMBINING ACUTE ACCENT ́ */,
-	      L'т'};
+	      u'т'};
   test_buffer_input_source (src, expected);
 
-  src = u8"❤️🦀";
+  src = (const char *) u8"❤️🦀";
   expected = {0x2764 /* HEAVY BLACK HEART */,
-	      0xfe0f /* VARIATION SELECTOR-16 */, L'🦀'};
+	      0xfe0f /* VARIATION SELECTOR-16 */, U'🦀'};
   test_buffer_input_source (src, expected);
 
-  src = u8"こんにちは";
-  expected = {L'こ', L'ん', L'に', L'ち', L'は'};
+  src = (const char *) u8"こんにちは";
+  expected = {u'こ', u'ん', u'に', u'ち', u'は'};
   test_file_input_source (src, expected);
 
-  src = u8"👮‍♂👩‍⚕";
+  src = (const char *) u8"👮‍♂👩‍⚕";
   expected
     = {0x1f46e /* POLICE OFFICER */,   0x200d /* ZERO WIDTH JOINER */,
        0x2642 /* MALE SIGN */,	       0x1f469 /* WOMAN */,
diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h
index 10293e0..383ffac 100644
--- a/gcc/rust/lex/rust-lex.h
+++ b/gcc/rust/lex/rust-lex.h
@@ -263,8 +263,7 @@ private:
 #if CHECKING_P
 
 namespace selftest {
-void
-rust_input_source_test ();
+void rust_input_source_test ();
 
 } // namespace selftest
 
diff --git a/gcc/rust/lex/rust-token.cc b/gcc/rust/lex/rust-token.cc
index 8493889..43682af 100644
--- a/gcc/rust/lex/rust-token.cc
+++ b/gcc/rust/lex/rust-token.cc
@@ -20,6 +20,7 @@
 #include "rust-token.h"
 #include "rust-diagnostics.h"
 #include "rust-unicode.h"
+#include "rust-ast.h"
 
 namespace Rust {
 // Hackily defined way to get token description for enum value using x-macros
@@ -88,7 +89,8 @@ token_id_keyword_string (TokenId id)
   switch (id)
     {
 #define RS_TOKEN_KEYWORD_2015(id, str_ptr)                                     \
-    case id: {                                                                 \
+  case id:                                                                     \
+    {                                                                          \
       static const std::string str (str_ptr);                                  \
       return str;                                                              \
     }                                                                          \
@@ -176,29 +178,6 @@ nfc_normalize_token_string (location_t loc, TokenId id, const std::string &str)
     return str;
 }
 
-const std::string &
-Token::get_str () const
-{
-  if (token_id_is_keyword (token_id))
-    return token_id_keyword_string (token_id);
-
-  // FIXME: attempt to return null again
-  // gcc_assert(str != NULL);
-
-  // HACK: allow referencing an empty string
-  static const std::string empty = "";
-
-  if (str == NULL)
-    {
-      rust_error_at (get_locus (),
-		     "attempted to get string for %qs, which has no string. "
-		     "returning empty string instead",
-		     get_token_description ());
-      return empty;
-    }
-  return *str;
-}
-
 namespace {
 enum class Context
 {
@@ -234,6 +213,13 @@ escape_special_chars (const std::string &source, Context ctx)
 
 } // namespace
 
+TokenPtr
+Token::make_identifier (const Identifier &ident)
+{
+  std::string str = ident;
+  return make_identifier (ident.get_locus (), std::move (str));
+}
+
 std::string
 Token::as_string () const
 {
diff --git a/gcc/rust/lex/rust-token.h b/gcc/rust/lex/rust-token.h
index aa2f308..71a7503 100644
--- a/gcc/rust/lex/rust-token.h
+++ b/gcc/rust/lex/rust-token.h
@@ -21,10 +21,14 @@
 
 #include "rust-system.h"
 #include "rust-linemap.h"
-#include "rust-make-unique.h"
 #include "rust-unicode.h"
+#include "rust-diagnostics.h"
 
 namespace Rust {
+
+// used by Rust::Token::make_identifier
+class Identifier;
+
 // "Primitive core types" in Rust - the different int and float types, as well
 // as some others
 enum PrimitiveCoreType
@@ -222,25 +226,20 @@ typedef std::shared_ptr<Token> TokenPtr;
 typedef std::shared_ptr<const Token> const_TokenPtr;
 
 // Hackily defined way to get token description for enum value using x-macros
-const char *
-get_token_description (TokenId id);
+const char *get_token_description (TokenId id);
 /* Hackily defined way to get token description as a string for enum value using
  * x-macros */
-const char *
-token_id_to_str (TokenId id);
+const char *token_id_to_str (TokenId id);
 /* checks if a token is a keyword */
-bool
-token_id_is_keyword (TokenId id);
+bool token_id_is_keyword (TokenId id);
 /* gets the string associated with a keyword */
-const std::string &
-token_id_keyword_string (TokenId id);
+const std::string &token_id_keyword_string (TokenId id);
 // Get type hint description as a string.
-const char *
-get_type_hint_string (PrimitiveCoreType type);
+const char *get_type_hint_string (PrimitiveCoreType type);
 
 /* Normalize string if a token is a identifier */
-std::string
-nfc_normalize_token_string (location_t loc, TokenId id, const std::string &str);
+std::string nfc_normalize_token_string (location_t loc, TokenId id,
+					const std::string &str);
 
 // Represents a single token. Create using factory static methods.
 class Token
@@ -251,7 +250,7 @@ private:
   // Token location.
   location_t locus;
   // Associated text (if any) of token.
-  std::unique_ptr<std::string> str;
+  std::string str;
   // TODO: maybe remove issues and just store std::string as value?
   /* Type hint for token based on lexer data (e.g. type suffix). Does not exist
    * for most tokens. */
@@ -259,23 +258,21 @@ private:
 
   // Token constructor from token id and location. Has a null string.
   Token (TokenId token_id, location_t location)
-    : token_id (token_id), locus (location), str (nullptr),
-      type_hint (CORETYPE_UNKNOWN)
+    : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN)
   {}
 
   // Token constructor from token id, location, and a string.
-  Token (TokenId token_id, location_t location, std::string &&paramStr)
+  Token (TokenId token_id, location_t location, std::string paramStr)
     : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN)
   {
     // Normalize identifier tokens
-    str = Rust::make_unique<std::string> (
-      nfc_normalize_token_string (location, token_id, paramStr));
+    str = nfc_normalize_token_string (location, token_id, std::move (paramStr));
   }
 
   // Token constructor from token id, location, and a char.
   Token (TokenId token_id, location_t location, char paramChar)
-    : token_id (token_id), locus (location),
-      str (new std::string (1, paramChar)), type_hint (CORETYPE_UNKNOWN)
+    : token_id (token_id), locus (location), str (1, paramChar),
+      type_hint (CORETYPE_UNKNOWN)
   {
     // Do not need to normalize 1byte char
   }
@@ -285,19 +282,17 @@ private:
     : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN)
   {
     // Normalize identifier tokens
-    str = Rust::make_unique<std::string> (
-      nfc_normalize_token_string (location, token_id,
-				  paramCodepoint.as_string ()));
+    str = nfc_normalize_token_string (location, token_id,
+				      paramCodepoint.as_string ());
   }
 
   // Token constructor from token id, location, a string, and type hint.
-  Token (TokenId token_id, location_t location, std::string &&paramStr,
+  Token (TokenId token_id, location_t location, std::string paramStr,
 	 PrimitiveCoreType parType)
     : token_id (token_id), locus (location), type_hint (parType)
   {
     // Normalize identifier tokens
-    str = Rust::make_unique<std::string> (
-      nfc_normalize_token_string (location, token_id, paramStr));
+    str = nfc_normalize_token_string (location, token_id, std::move (paramStr));
   }
 
 public:
@@ -324,14 +319,16 @@ public:
   }
 
   // Makes and returns a new TokenPtr of type IDENTIFIER.
-  static TokenPtr make_identifier (location_t locus, std::string &&str)
+  static TokenPtr make_identifier (location_t locus, std::string str)
   {
     // return std::make_shared<Token> (IDENTIFIER, locus, str);
     return TokenPtr (new Token (IDENTIFIER, locus, std::move (str)));
   }
 
+  static TokenPtr make_identifier (const Identifier &ident);
+
   // Makes and returns a new TokenPtr of type INT_LITERAL.
-  static TokenPtr make_int (location_t locus, std::string &&str,
+  static TokenPtr make_int (location_t locus, std::string str,
 			    PrimitiveCoreType type_hint = CORETYPE_UNKNOWN)
   {
     // return std::make_shared<Token> (INT_LITERAL, locus, str, type_hint);
@@ -340,7 +337,7 @@ public:
   }
 
   // Makes and returns a new TokenPtr of type FLOAT_LITERAL.
-  static TokenPtr make_float (location_t locus, std::string &&str,
+  static TokenPtr make_float (location_t locus, std::string str,
 			      PrimitiveCoreType type_hint = CORETYPE_UNKNOWN)
   {
     // return std::make_shared<Token> (FLOAT_LITERAL, locus, str, type_hint);
@@ -349,7 +346,7 @@ public:
   }
 
   // Makes and returns a new TokenPtr of type STRING_LITERAL.
-  static TokenPtr make_string (location_t locus, std::string &&str)
+  static TokenPtr make_string (location_t locus, std::string str)
   {
     // return std::make_shared<Token> (STRING_LITERAL, locus, str,
     // CORETYPE_STR);
@@ -372,32 +369,32 @@ public:
   }
 
   // Makes and returns a new TokenPtr of type BYTE_STRING_LITERAL (fix).
-  static TokenPtr make_byte_string (location_t locus, std::string &&str)
+  static TokenPtr make_byte_string (location_t locus, std::string str)
   {
     // return std::make_shared<Token> (BYTE_STRING_LITERAL, locus, str);
     return TokenPtr (new Token (BYTE_STRING_LITERAL, locus, std::move (str)));
   }
 
   // Makes and returns a new TokenPtr of type RAW_STRING_LITERAL.
-  static TokenPtr make_raw_string (location_t locus, std::string &&str)
+  static TokenPtr make_raw_string (location_t locus, std::string str)
   {
     return TokenPtr (new Token (RAW_STRING_LITERAL, locus, std::move (str)));
   }
 
   // Makes and returns a new TokenPtr of type INNER_DOC_COMMENT.
-  static TokenPtr make_inner_doc_comment (location_t locus, std::string &&str)
+  static TokenPtr make_inner_doc_comment (location_t locus, std::string str)
   {
     return TokenPtr (new Token (INNER_DOC_COMMENT, locus, std::move (str)));
   }
 
   // Makes and returns a new TokenPtr of type OUTER_DOC_COMMENT.
-  static TokenPtr make_outer_doc_comment (location_t locus, std::string &&str)
+  static TokenPtr make_outer_doc_comment (location_t locus, std::string str)
   {
     return TokenPtr (new Token (OUTER_DOC_COMMENT, locus, std::move (str)));
   }
 
   // Makes and returns a new TokenPtr of type LIFETIME.
-  static TokenPtr make_lifetime (location_t locus, std::string &&str)
+  static TokenPtr make_lifetime (location_t locus, std::string str)
   {
     // return std::make_shared<Token> (LIFETIME, locus, str);
     return TokenPtr (new Token (LIFETIME, locus, std::move (str)));
@@ -413,16 +410,18 @@ public:
   void set_locus (location_t locus) { this->locus = locus; }
 
   // Gets string description of the token.
-  const std::string &
-  get_str () const; /*{
-// FIXME: put in header again when fix null problem
-//gcc_assert(str != nullptr);
-if (str == nullptr) {
-error_at(get_locus(), "attempted to get string for '%s', which has no string.
-returning empty string instead.", get_token_description()); return "";
-}
-return *str;
-}*/
+  const std::string &get_str () const
+  {
+    if (token_id_is_keyword (token_id))
+      return token_id_keyword_string (token_id);
+
+    if (!should_have_str ())
+      rust_internal_error_at (
+	locus, "attempting to get string for %qs, which should have no string",
+	get_token_description ());
+
+    return str;
+  }
 
   // Gets token's type hint info.
   PrimitiveCoreType get_type_hint () const
@@ -464,14 +463,11 @@ return *str;
       }
   }
 
-  /* Returns whether the token actually has a string (regardless of whether it
-   * should or not). */
-  bool has_str () const { return str != nullptr; }
-
   // Returns whether the token should have a string.
   bool should_have_str () const
   {
-    return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME;
+    return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME
+	   || token_id == INNER_DOC_COMMENT || token_id == OUTER_DOC_COMMENT;
   }
 
   // Returns whether the token is a pure decimal int literal