aboutsummaryrefslogtreecommitdiff
path: root/gdb/rust-parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'gdb/rust-parse.c')
-rw-r--r--gdb/rust-parse.c70
1 files changed, 52 insertions, 18 deletions
diff --git a/gdb/rust-parse.c b/gdb/rust-parse.c
index 894f2e7..8be7d33 100644
--- a/gdb/rust-parse.c
+++ b/gdb/rust-parse.c
@@ -33,6 +33,12 @@
using namespace expr;
+#if WORDS_BIGENDIAN
+#define UTF32 "UTF-32BE"
+#else
+#define UTF32 "UTF-32LE"
+#endif
+
/* A regular expression for matching Rust numbers. This is split up
since it is very long and this gives us a way to comment the
sections. */
@@ -577,6 +583,35 @@ rust_parser::lex_escape (int is_byte)
return result;
}
+/* A helper for lex_character. Search forward for the closing single
+ quote, then convert the bytes from the host charset to UTF-32. */
+
+static uint32_t
+lex_multibyte_char (const char *text, int *len)
+{
+ /* Only look a maximum of 5 bytes for the closing quote. This is
+ the maximum for UTF-8. */
+ int quote;
+ gdb_assert (text[0] != '\'');
+ for (quote = 1; text[quote] != '\0' && text[quote] != '\''; ++quote)
+ ;
+ *len = quote;
+ /* The caller will issue an error. */
+ if (text[quote] == '\0')
+ return 0;
+
+ auto_obstack result;
+ convert_between_encodings (host_charset (), UTF32, (const gdb_byte *) text,
+ quote, 1, &result, translit_none);
+
+ int size = obstack_object_size (&result);
+ if (size > 4)
+ error (_("overlong character literal"));
+ uint32_t value;
+ memcpy (&value, obstack_finish (&result), size);
+ return value;
+}
+
/* Lex a character constant. */
int
@@ -592,13 +627,15 @@ rust_parser::lex_character ()
}
gdb_assert (pstate->lexptr[0] == '\'');
++pstate->lexptr;
- /* This should handle UTF-8 here. */
- if (pstate->lexptr[0] == '\\')
+ if (pstate->lexptr[0] == '\'')
+ error (_("empty character literal"));
+ else if (pstate->lexptr[0] == '\\')
value = lex_escape (is_byte);
else
{
- value = pstate->lexptr[0] & 0xff;
- ++pstate->lexptr;
+ int len;
+ value = lex_multibyte_char (&pstate->lexptr[0], &len);
+ pstate->lexptr += len;
}
if (pstate->lexptr[0] != '\'')
@@ -695,16 +732,9 @@ rust_parser::lex_string ()
if (is_byte)
obstack_1grow (&obstack, value);
else
- {
-#if WORDS_BIGENDIAN
-#define UTF32 "UTF-32BE"
-#else
-#define UTF32 "UTF-32LE"
-#endif
- convert_between_encodings (UTF32, "UTF-8", (gdb_byte *) &value,
- sizeof (value), sizeof (value),
- &obstack, translit_none);
- }
+ convert_between_encodings (UTF32, "UTF-8", (gdb_byte *) &value,
+ sizeof (value), sizeof (value),
+ &obstack, translit_none);
}
else if (pstate->lexptr[0] == '\0')
error (_("Unexpected EOF in string"));
@@ -746,7 +776,10 @@ rust_identifier_start_p (char c)
return ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| c == '_'
- || c == '$');
+ || c == '$'
+ /* Allow any non-ASCII character as an identifier. There
+ doesn't seem to be a need to be picky about this. */
+ || (c & 0x80) != 0);
}
/* Lex an identifier. */
@@ -772,13 +805,14 @@ rust_parser::lex_identifier ()
++pstate->lexptr;
- /* For the time being this doesn't handle Unicode rules. Non-ASCII
- identifiers are gated anyway. */
+ /* Allow any non-ASCII character here. This "handles" UTF-8 by
+ passing it through. */
while ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'z')
|| (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'Z')
|| pstate->lexptr[0] == '_'
|| (is_gdb_var && pstate->lexptr[0] == '$')
- || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9'))
+ || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
+ || (pstate->lexptr[0] & 0x80) != 0)
++pstate->lexptr;