diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/rust/backend/rust-compile-expr.h | 43 | ||||
-rw-r--r-- | gcc/rust/lex/rust-lex.cc | 88 | ||||
-rw-r--r-- | gcc/rust/typecheck/rust-hir-type-check-expr.h | 44 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/torture/byte_str.rs | 4 | ||||
-rw-r--r-- | gcc/testsuite/rust/compile/unicode_escape.rs | 60 |
5 files changed, 216 insertions, 23 deletions
diff --git a/gcc/rust/backend/rust-compile-expr.h b/gcc/rust/backend/rust-compile-expr.h index d0c0b74..eb245dc 100644 --- a/gcc/rust/backend/rust-compile-expr.h +++ b/gcc/rust/backend/rust-compile-expr.h @@ -304,8 +304,7 @@ public: } return; - case HIR::Literal::STRING: - case HIR::Literal::BYTE_STRING: { + case HIR::Literal::STRING: { auto base = ctx->get_backend ()->string_constant_expression ( literal_value->as_string ()); translated @@ -313,6 +312,46 @@ public: } return; + case HIR::Literal::BYTE_STRING: { + TyTy::BaseType *tyty = nullptr; + if (!ctx->get_tyctx ()->lookup_type ( + expr.get_mappings ().get_hirid (), &tyty)) + { + rust_fatal_error (expr.get_locus (), + "did not resolve type for this array expr"); + return; + } + + // the type here is &[ty; capacity] + rust_assert (tyty->get_kind () == TyTy::TypeKind::REF); + auto ref_tyty = static_cast<TyTy::ReferenceType *> (tyty); + auto base_tyty = ref_tyty->get_base (); + rust_assert (base_tyty->get_kind () == TyTy::TypeKind::ARRAY); + auto array_tyty = static_cast<TyTy::ArrayType *> (base_tyty); + + std::string value_str = expr.get_literal ()->as_string (); + std::vector<Bexpression *> vals; + std::vector<unsigned long> indexes; + for (size_t i = 0; i < value_str.size (); i++) + { + char b = value_str.at (i); + Bexpression *bb + = ctx->get_backend ()->char_constant_expression (b); + vals.push_back (bb); + indexes.push_back (i); + } + + Btype *array_type = TyTyResolveCompile::compile (ctx, array_tyty); + Bexpression *constructed + = ctx->get_backend ()->array_constructor_expression ( + array_type, indexes, vals, expr.get_locus ()); + + translated + = ctx->get_backend ()->address_expression (constructed, + expr.get_locus ()); + } + return; + default: rust_fatal_error (expr.get_locus (), "unknown literal"); return; diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index bbddea0..2b3c89b 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -1273,6 +1273,8 @@ Lexer::parse_escape (char opening_char) rust_error_at (get_current_location (), "cannot have a unicode escape \\u in a byte %s", opening_char == '\'' ? "character" : "string"); + // Try to parse it anyway, just to skip it + parse_partial_unicode_escape (); return std::make_tuple (output_char, additional_length_offset, false); case '\r': case '\n': @@ -1461,16 +1463,34 @@ Lexer::parse_partial_unicode_escape () { skip_input (); current_char = peek_input (); - int additional_length_offset = 1; + int additional_length_offset = 0; - bool need_close_brace = false; - if (current_char == '{') + if (current_char != '{') { - need_close_brace = true; + rust_error_at (get_current_location (), + "unicode escape should start with %<{%>"); + /* Skip what should probaby have been between brackets. */ + while (is_x_digit (current_char) || current_char == '_') + { + skip_input (); + current_char = peek_input (); + additional_length_offset++; + } + return std::make_pair (Codepoint (0), additional_length_offset); + } + skip_input (); + current_char = peek_input (); + additional_length_offset++; + + if (current_char == '_') + { + rust_error_at (get_current_location (), + "unicode escape cannot start with %<_%>"); skip_input (); current_char = peek_input (); additional_length_offset++; + // fallthrough and try to parse the rest anyway } // parse unicode escape - 1-6 hex digits @@ -1500,21 +1520,45 @@ Lexer::parse_partial_unicode_escape () current_char = peek_input (); } - // ensure closing brace if required - if (need_close_brace) + if (current_char == '}') { - if (current_char == '}') + skip_input (); + current_char = peek_input (); + additional_length_offset++; + } + else + { + // actually an error, but allow propagation anyway Assume that + // wrong bracketm whitespace or single/double quotes are wrong + // termination, otherwise it is a wrong character, then skip to the actual + // terminator. + if (current_char == '{' || is_whitespace (current_char) + || current_char == '\'' || current_char == '"') { - skip_input (); - current_char = peek_input (); - additional_length_offset++; + rust_error_at (get_current_location (), + "expected terminating %<}%> in unicode escape"); + return std::make_pair (Codepoint (0), additional_length_offset); } else { - // actually an error, but allow propagation anyway rust_error_at (get_current_location (), - "expected terminating %<}%> in unicode escape"); - // return false; + "invalid character %<%c%> in unicode escape", + current_char); + while (current_char != '}' && current_char != '{' + && !is_whitespace (current_char) && current_char != '\'' + && current_char != '"') + { + skip_input (); + current_char = peek_input (); + additional_length_offset++; + } + // Consume the actual closing bracket if found + if (current_char == '}') + { + skip_input (); + current_char = peek_input (); + additional_length_offset++; + } return std::make_pair (Codepoint (0), additional_length_offset); } } @@ -1530,10 +1574,22 @@ Lexer::parse_partial_unicode_escape () return std::make_pair (Codepoint (0), additional_length_offset); } - long hex_num = std::strtol (num_str.c_str (), nullptr, 16); + unsigned long hex_num = std::strtoul (num_str.c_str (), nullptr, 16); - // assert fits a uint32_t - gcc_assert (hex_num < 4294967296); + if (hex_num > 0xd7ff && hex_num < 0xe000) + { + rust_error_at ( + get_current_location (), + "unicode escape cannot be a surrogate value (D800 to DFFF)"); + return std::make_pair (Codepoint (0), additional_length_offset); + } + + if (hex_num > 0x10ffff) + { + rust_error_at (get_current_location (), + "unicode escape cannot be larger than 10FFFF"); + return std::make_pair (Codepoint (0), additional_length_offset); + } // return true; return std::make_pair (Codepoint (static_cast<uint32_t> (hex_num)), diff --git a/gcc/rust/typecheck/rust-hir-type-check-expr.h b/gcc/rust/typecheck/rust-hir-type-check-expr.h index fe8973a..28b9851 100644 --- a/gcc/rust/typecheck/rust-hir-type-check-expr.h +++ b/gcc/rust/typecheck/rust-hir-type-check-expr.h @@ -609,15 +609,49 @@ public: break; case HIR::Literal::LitType::BYTE_STRING: { - /* We just treat this as a string, but it really is an arraytype of - u8. It isn't in UTF-8, but really just a byte array. */ - TyTy::BaseType *base = nullptr; - auto ok = context->lookup_builtin ("str", &base); + /* This is an arraytype of u8 reference (&[u8;size]). It isn't in + UTF-8, but really just a byte array. Code to construct the array + reference copied from ArrayElemsValues and ArrayType. */ + TyTy::BaseType *u8; + auto ok = context->lookup_builtin ("u8", &u8); rust_assert (ok); + auto crate_num = mappings->get_current_crate (); + Analysis::NodeMapping capacity_mapping (crate_num, UNKNOWN_NODEID, + mappings->get_next_hir_id ( + crate_num), + UNKNOWN_LOCAL_DEFID); + + /* Capacity is the size of the string (number of chars). + It is a constant, but for fold it to get a Bexpression. */ + std::string capacity_str + = std::to_string (expr.get_literal ()->as_string ().size ()); + HIR::LiteralExpr literal_capacity (capacity_mapping, capacity_str, + HIR::Literal::LitType::INT, + PrimitiveCoreType::CORETYPE_USIZE, + expr.get_locus ()); + + // mark the type for this implicit node + context->insert_type (capacity_mapping, + new TyTy::USizeType ( + capacity_mapping.get_hirid ())); + + Bexpression *capacity + = ConstFold::ConstFoldExpr::fold (&literal_capacity); + + Analysis::NodeMapping array_mapping (crate_num, UNKNOWN_NODEID, + mappings->get_next_hir_id ( + crate_num), + UNKNOWN_LOCAL_DEFID); + + TyTy::ArrayType *array + = new TyTy::ArrayType (array_mapping.get_hirid (), capacity, + TyTy::TyVar (u8->get_ref ())); + context->insert_type (array_mapping, array); + infered = new TyTy::ReferenceType (expr.get_mappings ().get_hirid (), - TyTy::TyVar (base->get_ref ()), false); + TyTy::TyVar (array->get_ref ()), false); } break; diff --git a/gcc/testsuite/rust/compile/torture/byte_str.rs b/gcc/testsuite/rust/compile/torture/byte_str.rs new file mode 100644 index 0000000..28934d2 --- /dev/null +++ b/gcc/testsuite/rust/compile/torture/byte_str.rs @@ -0,0 +1,4 @@ +pub fn main() { + let a: &[u8; 4]; + a = b"test"; +} diff --git a/gcc/testsuite/rust/compile/unicode_escape.rs b/gcc/testsuite/rust/compile/unicode_escape.rs new file mode 100644 index 0000000..39b91d8 --- /dev/null +++ b/gcc/testsuite/rust/compile/unicode_escape.rs @@ -0,0 +1,60 @@ +fn main () +{ + // Braces are required + let _cbl = '\u013'; // { dg-error "unicode escape" } + let _sbl = "\u013"; //{ dg-error "unicode escape" } + + // One to six hex digits + let _c0 = '\u{}'; // { dg-error "unicode escape" } + let _c1 = '\u{0}'; + let _c2 = '\u{00}'; + let _c3 = '\u{000}'; + let _c4 = '\u{0000}'; + let _c5 = '\u{00000}'; + let _c6 = '\u{000000}'; + let _c7 = '\u{0000000}'; // { dg-error "unicode escape" } + + let _s0 = "\u{}"; // { dg-error "unicode escape" } + let _s1 = "\u{0}"; + let _s2 = "\u{00}"; + let _s3 = "\u{000}"; + let _s4 = "\u{0000}"; + let _s5 = "\u{00000}"; + let _s6 = "\u{000000}"; + let _s7 = "\u{0000000}"; // { dg-error "unicode escape" } + + // Underscores OK except for start + let _c_ = '\u{00___01__0_1_}'; + let _s_ = "\u{00___01__0_1_}"; + let _c__ = '\u{_00__01__0_}'; // { dg-error "unicode escape" } + let _s__ = "\u{_00__01__0_}"; // { dg-error "unicode escape" } + + // Must be hex chars + let _chex = '\u{hex}'; // { dg-error "unicode escape" } + let _shex = '\u{hex}'; // { dg-error "unicode escape" } + + // Only valid from 0x0 to 0xD7FF and from 0xE000 to 0x10FFF + let _cd7ff = '\u{D7FF}'; + let _sd7ff = "\u{D7FF}"; + let _cd800 = '\u{D800}'; // { dg-error "unicode escape" } + let _sd800 = "\u{D800}"; // { dg-error "unicode escape" } + + let _cdfff = '\u{DFFF}'; // { dg-error "unicode escape" } + let _sdfff = "\u{DFFF}"; // { dg-error "unicode escape" } + let _ce000 = '\u{E000}'; + let _se000 = "\u{E000}"; + + let _clast = '\u{10FFFF}'; + let _slast = "\u{10FFFF}"; + let _clast1 = '\u{110000}'; // { dg-error "unicode escape" } + let _slast1 = "\u{110000}"; // { dg-error "unicode escape" } + + let _cffffff = '\u{FFFFFF}'; // { dg-error "unicode escape" } + let _sffffff = "\u{FFFFFF}"; // { dg-error "unicode escape" } + + // unicode escapes cannot be used in bytes or byte strings. + // Except in raw byte strings (where they aren't escapes). + let _bc = b'\u{000A}'; // { dg-error "unicode escape" } + let _bs = b"\u{000A}"; // { dg-error "unicode escape" } + let _rbs = br"\u{000A}"; +} |