5 files changed, 216 insertions, 23 deletions
diff --git a/gcc/rust/backend/rust-compile-expr.h b/gcc/rust/backend/rust-compile-expr.h
index d0c0b74..eb245dc 100644
--- a/gcc/rust/backend/rust-compile-expr.h
+++ b/gcc/rust/backend/rust-compile-expr.h
@@ -304,8 +304,7 @@ public:
 	}
 	return;
 
-      case HIR::Literal::STRING:
-	case HIR::Literal::BYTE_STRING: {
+	case HIR::Literal::STRING: {
 	  auto base = ctx->get_backend ()->string_constant_expression (
 	    literal_value->as_string ());
 	  translated
@@ -313,6 +312,46 @@ public:
 	}
 	return;
 
+	case HIR::Literal::BYTE_STRING: {
+	  TyTy::BaseType *tyty = nullptr;
+	  if (!ctx->get_tyctx ()->lookup_type (
+		expr.get_mappings ().get_hirid (), &tyty))
+	    {
+	      rust_fatal_error (expr.get_locus (),
+				"did not resolve type for this array expr");
+	      return;
+	    }
+
+	  // the type here is &[ty; capacity]
+	  rust_assert (tyty->get_kind () == TyTy::TypeKind::REF);
+	  auto ref_tyty = static_cast<TyTy::ReferenceType *> (tyty);
+	  auto base_tyty = ref_tyty->get_base ();
+	  rust_assert (base_tyty->get_kind () == TyTy::TypeKind::ARRAY);
+	  auto array_tyty = static_cast<TyTy::ArrayType *> (base_tyty);
+
+	  std::string value_str = expr.get_literal ()->as_string ();
+	  std::vector<Bexpression *> vals;
+	  std::vector<unsigned long> indexes;
+	  for (size_t i = 0; i < value_str.size (); i++)
+	    {
+	      char b = value_str.at (i);
+	      Bexpression *bb
+		= ctx->get_backend ()->char_constant_expression (b);
+	      vals.push_back (bb);
+	      indexes.push_back (i);
+	    }
+
+	  Btype *array_type = TyTyResolveCompile::compile (ctx, array_tyty);
+	  Bexpression *constructed
+	    = ctx->get_backend ()->array_constructor_expression (
+	      array_type, indexes, vals, expr.get_locus ());
+
+	  translated
+	    = ctx->get_backend ()->address_expression (constructed,
+						       expr.get_locus ());
+	}
+	return;
+
       default:
 	rust_fatal_error (expr.get_locus (), "unknown literal");
 	return;
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index bbddea0..2b3c89b 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -1273,6 +1273,8 @@ Lexer::parse_escape (char opening_char)
       rust_error_at (get_current_location (),
 		     "cannot have a unicode escape \\u in a byte %s",
 		     opening_char == '\'' ? "character" : "string");
+      // Try to parse it anyway, just to skip it
+      parse_partial_unicode_escape ();
       return std::make_tuple (output_char, additional_length_offset, false);
     case '\r':
     case '\n':
@@ -1461,16 +1463,34 @@ Lexer::parse_partial_unicode_escape ()
 {
   skip_input ();
   current_char = peek_input ();
-  int additional_length_offset = 1;
+  int additional_length_offset = 0;
 
-  bool need_close_brace = false;
-  if (current_char == '{')
+  if (current_char != '{')
     {
-      need_close_brace = true;
+      rust_error_at (get_current_location (),
+		     "unicode escape should start with %<{%>");
+      /* Skip what should probaby have been between brackets.  */
+      while (is_x_digit (current_char) || current_char == '_')
+	{
+	  skip_input ();
+	  current_char = peek_input ();
+	  additional_length_offset++;
+	}
+      return std::make_pair (Codepoint (0), additional_length_offset);
+    }
 
+  skip_input ();
+  current_char = peek_input ();
+  additional_length_offset++;
+
+  if (current_char == '_')
+    {
+      rust_error_at (get_current_location (),
+		     "unicode escape cannot start with %<_%>");
       skip_input ();
       current_char = peek_input ();
       additional_length_offset++;
+      // fallthrough and try to parse the rest anyway
     }
 
   // parse unicode escape - 1-6 hex digits
@@ -1500,21 +1520,45 @@ Lexer::parse_partial_unicode_escape ()
       current_char = peek_input ();
     }
 
-  // ensure closing brace if required
-  if (need_close_brace)
+  if (current_char == '}')
     {
-      if (current_char == '}')
+      skip_input ();
+      current_char = peek_input ();
+      additional_length_offset++;
+    }
+  else
+    {
+      // actually an error, but allow propagation anyway Assume that
+      // wrong bracketm whitespace or single/double quotes are wrong
+      // termination, otherwise it is a wrong character, then skip to the actual
+      // terminator.
+      if (current_char == '{' || is_whitespace (current_char)
+	  || current_char == '\'' || current_char == '"')
 	{
-	  skip_input ();
-	  current_char = peek_input ();
-	  additional_length_offset++;
+	  rust_error_at (get_current_location (),
+			 "expected terminating %<}%> in unicode escape");
+	  return std::make_pair (Codepoint (0), additional_length_offset);
 	}
       else
 	{
-	  // actually an error, but allow propagation anyway
 	  rust_error_at (get_current_location (),
-			 "expected terminating %<}%> in unicode escape");
-	  // return false;
+			 "invalid character %<%c%> in unicode escape",
+			 current_char);
+	  while (current_char != '}' && current_char != '{'
+		 && !is_whitespace (current_char) && current_char != '\''
+		 && current_char != '"')
+	    {
+	      skip_input ();
+	      current_char = peek_input ();
+	      additional_length_offset++;
+	    }
+	  // Consume the actual closing bracket if found
+	  if (current_char == '}')
+	    {
+	      skip_input ();
+	      current_char = peek_input ();
+	      additional_length_offset++;
+	    }
 	  return std::make_pair (Codepoint (0), additional_length_offset);
 	}
     }
@@ -1530,10 +1574,22 @@ Lexer::parse_partial_unicode_escape ()
       return std::make_pair (Codepoint (0), additional_length_offset);
     }
 
-  long hex_num = std::strtol (num_str.c_str (), nullptr, 16);
+  unsigned long hex_num = std::strtoul (num_str.c_str (), nullptr, 16);
 
-  // assert fits a uint32_t
-  gcc_assert (hex_num < 4294967296);
+  if (hex_num > 0xd7ff && hex_num < 0xe000)
+    {
+      rust_error_at (
+	get_current_location (),
+	"unicode escape cannot be a surrogate value (D800 to DFFF)");
+      return std::make_pair (Codepoint (0), additional_length_offset);
+    }
+
+  if (hex_num > 0x10ffff)
+    {
+      rust_error_at (get_current_location (),
+		     "unicode escape cannot be larger than 10FFFF");
+      return std::make_pair (Codepoint (0), additional_length_offset);
+    }
 
   // return true;
   return std::make_pair (Codepoint (static_cast<uint32_t> (hex_num)),
diff --git a/gcc/rust/typecheck/rust-hir-type-check-expr.h b/gcc/rust/typecheck/rust-hir-type-check-expr.h
index fe8973a..28b9851 100644
--- a/gcc/rust/typecheck/rust-hir-type-check-expr.h
+++ b/gcc/rust/typecheck/rust-hir-type-check-expr.h
@@ -609,15 +609,49 @@ public:
 	break;
 
 	case HIR::Literal::LitType::BYTE_STRING: {
-	  /* We just treat this as a string, but it really is an arraytype of
-	     u8. It isn't in UTF-8, but really just a byte array.  */
-	  TyTy::BaseType *base = nullptr;
-	  auto ok = context->lookup_builtin ("str", &base);
+	  /* This is an arraytype of u8 reference (&[u8;size]). It isn't in
+	     UTF-8, but really just a byte array. Code to construct the array
+	     reference copied from ArrayElemsValues and ArrayType. */
+	  TyTy::BaseType *u8;
+	  auto ok = context->lookup_builtin ("u8", &u8);
 	  rust_assert (ok);
 
+	  auto crate_num = mappings->get_current_crate ();
+	  Analysis::NodeMapping capacity_mapping (crate_num, UNKNOWN_NODEID,
+						  mappings->get_next_hir_id (
+						    crate_num),
+						  UNKNOWN_LOCAL_DEFID);
+
+	  /* Capacity is the size of the string (number of chars).
+	     It is a constant, but for fold it to get a Bexpression.  */
+	  std::string capacity_str
+	    = std::to_string (expr.get_literal ()->as_string ().size ());
+	  HIR::LiteralExpr literal_capacity (capacity_mapping, capacity_str,
+					     HIR::Literal::LitType::INT,
+					     PrimitiveCoreType::CORETYPE_USIZE,
+					     expr.get_locus ());
+
+	  // mark the type for this implicit node
+	  context->insert_type (capacity_mapping,
+				new TyTy::USizeType (
+				  capacity_mapping.get_hirid ()));
+
+	  Bexpression *capacity
+	    = ConstFold::ConstFoldExpr::fold (&literal_capacity);
+
+	  Analysis::NodeMapping array_mapping (crate_num, UNKNOWN_NODEID,
+					       mappings->get_next_hir_id (
+						 crate_num),
+					       UNKNOWN_LOCAL_DEFID);
+
+	  TyTy::ArrayType *array
+	    = new TyTy::ArrayType (array_mapping.get_hirid (), capacity,
+				   TyTy::TyVar (u8->get_ref ()));
+	  context->insert_type (array_mapping, array);
+
 	  infered
 	    = new TyTy::ReferenceType (expr.get_mappings ().get_hirid (),
-				       TyTy::TyVar (base->get_ref ()), false);
+				       TyTy::TyVar (array->get_ref ()), false);
 	}
 	break;
 
diff --git a/gcc/testsuite/rust/compile/torture/byte_str.rs b/gcc/testsuite/rust/compile/torture/byte_str.rs
new file mode 100644
index 0000000..28934d2
--- /dev/null
+++ b/gcc/testsuite/rust/compile/torture/byte_str.rs
@@ -0,0 +1,4 @@
+pub fn main() {
+    let a: &[u8; 4];
+    a = b"test";
+}
diff --git a/gcc/testsuite/rust/compile/unicode_escape.rs b/gcc/testsuite/rust/compile/unicode_escape.rs
new file mode 100644
index 0000000..39b91d8
--- /dev/null
+++ b/gcc/testsuite/rust/compile/unicode_escape.rs
@@ -0,0 +1,60 @@
+fn main ()
+{
+  // Braces are required
+  let _cbl = '\u013'; // { dg-error "unicode escape" }
+  let _sbl = "\u013"; //{ dg-error "unicode escape" }
+
+  // One to six hex digits
+  let _c0 = '\u{}'; // { dg-error "unicode escape" }
+  let _c1 = '\u{0}';
+  let _c2 = '\u{00}';
+  let _c3 = '\u{000}';
+  let _c4 = '\u{0000}';
+  let _c5 = '\u{00000}';
+  let _c6 = '\u{000000}';
+  let _c7 = '\u{0000000}'; // { dg-error "unicode escape" }
+
+  let _s0 = "\u{}"; // { dg-error "unicode escape" }
+  let _s1 = "\u{0}";
+  let _s2 = "\u{00}";
+  let _s3 = "\u{000}";
+  let _s4 = "\u{0000}";
+  let _s5 = "\u{00000}";
+  let _s6 = "\u{000000}";
+  let _s7 = "\u{0000000}"; // { dg-error "unicode escape" }
+
+  // Underscores OK except for start
+  let _c_ = '\u{00___01__0_1_}';
+  let _s_ = "\u{00___01__0_1_}";
+  let _c__ = '\u{_00__01__0_}'; // { dg-error "unicode escape" }
+  let _s__ = "\u{_00__01__0_}"; // { dg-error "unicode escape" }
+
+  // Must be hex chars
+  let _chex = '\u{hex}';  // { dg-error "unicode escape" }
+  let _shex = '\u{hex}';  // { dg-error "unicode escape" }
+
+  // Only valid from 0x0 to 0xD7FF and from 0xE000 to 0x10FFF
+  let _cd7ff = '\u{D7FF}';
+  let _sd7ff = "\u{D7FF}";
+  let _cd800 = '\u{D800}'; // { dg-error "unicode escape" }
+  let _sd800 = "\u{D800}"; // { dg-error "unicode escape" }
+
+  let _cdfff = '\u{DFFF}'; // { dg-error "unicode escape" }
+  let _sdfff = "\u{DFFF}"; // { dg-error "unicode escape" }
+  let _ce000 = '\u{E000}';
+  let _se000 = "\u{E000}";
+
+  let _clast = '\u{10FFFF}';
+  let _slast = "\u{10FFFF}";
+  let _clast1 = '\u{110000}'; // { dg-error "unicode escape" }
+  let _slast1 = "\u{110000}"; // { dg-error "unicode escape" }
+
+  let _cffffff = '\u{FFFFFF}'; // { dg-error "unicode escape" }
+  let _sffffff = "\u{FFFFFF}"; // { dg-error "unicode escape" }
+
+  // unicode escapes cannot be used in bytes or byte strings.
+  // Except in raw byte strings (where they aren't escapes).
+  let _bc = b'\u{000A}'; // { dg-error "unicode escape" }
+  let _bs = b"\u{000A}"; // { dg-error "unicode escape" }
+  let _rbs = br"\u{000A}";
+}