Handle multi-byte bracket sequences in Ada lexer

As noted in an earlier patch, the Ada lexer does not handle multi-byte bracket sequences. This patch adds support for these for character literals. gdb does not generally seem to handle the Ada wide string types, so for the time being these continue to be excluded -- but an explicit error is added to make this more clear.
author: Tom Tromey <tromey@adacore.com> 2022-01-26 07:11:18 -0700
committer: Tom Tromey <tromey@adacore.com> 2022-02-28 10:49:29 -0700
commit: c9f66f0005000492739dd063ea2949045bf70bc6 (patch)
tree: 7298f37b0e63711abc336fa0695d80a67d5c9144
parent: a7041de85a0cc43b86989eb697cef7a6cecdbdb7 (diff)
download: gdb-c9f66f0005000492739dd063ea2949045bf70bc6.zip
gdb-c9f66f0005000492739dd063ea2949045bf70bc6.tar.gz
gdb-c9f66f0005000492739dd063ea2949045bf70bc6.tar.bz2
6 files changed, 52 insertions, 21 deletions
diff --git a/gdb/ada-exp.y b/gdb/ada-exp.y
index 916b8ef..d3fce8d 100644
--- a/gdb/ada-exp.y
+++ b/gdb/ada-exp.y
@@ -98,7 +98,7 @@ static struct type *type_long_long (struct parser_state *);
 
 static struct type *type_long_double (struct parser_state *);
 
-static struct type *type_char (struct parser_state *);
+static struct type *type_for_char (struct parser_state *, ULONGEST);
 
 static struct type *type_boolean (struct parser_state *);
 
@@ -1727,10 +1727,18 @@ type_long_double (struct parser_state *par_state)
 }
 
 static struct type *
-type_char (struct parser_state *par_state)
+type_for_char (struct parser_state *par_state, ULONGEST value)
 {
-  return language_string_char_type (par_state->language (),
-				    par_state->gdbarch ());
+  if (value <= 0xff)
+    return language_string_char_type (par_state->language (),
+				      par_state->gdbarch ());
+  else if (value <= 0xffff)
+    return language_lookup_primitive_type (par_state->language (),
+					   par_state->gdbarch (),
+					   "wide_character");
+  return language_lookup_primitive_type (par_state->language (),
+					 par_state->gdbarch (),
+					 "wide_wide_character");
 }
 
 static struct type *
diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c
index f1d59d2..d44b090 100644
--- a/gdb/ada-lang.c
+++ b/gdb/ada-lang.c
@@ -10187,7 +10187,7 @@ ada_resolvable::replace (operation_up &&owner,
   return std::move (owner);
 }
 
-/* Convert the character literal whose ASCII value would be VAL to the
+/* Convert the character literal whose value would be VAL to the
    appropriate value of type TYPE, if there is a translation.
    Otherwise return VAL.  Hence, in an enumeration type ('A', 'B'),
    the literal 'A' (VAL == 65), returns 0.  */
@@ -10195,7 +10195,7 @@ ada_resolvable::replace (operation_up &&owner,
 static LONGEST
 convert_char_literal (struct type *type, LONGEST val)
 {
-  char name[7];
+  char name[12];
   int f;
 
   if (type == NULL)
@@ -10206,8 +10206,12 @@ convert_char_literal (struct type *type, LONGEST val)
 
   if ((val >= 'a' && val <= 'z') || (val >= '0' && val <= '9'))
     xsnprintf (name, sizeof (name), "Q%c", (int) val);
+  else if (val >= 0 && val < 256)
+    xsnprintf (name, sizeof (name), "QU%02x", (unsigned) val);
+  else if (val >= 0 && val < 0x10000)
+    xsnprintf (name, sizeof (name), "QW%04x", (unsigned) val);
   else
-    xsnprintf (name, sizeof (name), "QU%02x", (int) val);
+    xsnprintf (name, sizeof (name), "QWW%08lx", (unsigned long) val);
   size_t len = strlen (name);
   for (f = 0; f < type->num_fields (); f += 1)
     {
@@ -13005,9 +13009,11 @@ public:
     add (arch_integer_type (gdbarch, gdbarch_short_bit (gdbarch),
 			    0, "short_integer"));
     struct type *char_type = arch_character_type (gdbarch, TARGET_CHAR_BIT,
-						  0, "character");
+						  1, "character");
     lai->set_string_char_type (char_type);
     add (char_type);
+    add (arch_character_type (gdbarch, 16, 1, "wide_character"));
+    add (arch_character_type (gdbarch, 32, 1, "wide_wide_character"));
     add (arch_float_type (gdbarch, gdbarch_float_bit (gdbarch),
 			  "float", gdbarch_float_format (gdbarch)));
     add (arch_float_type (gdbarch, gdbarch_double_bit (gdbarch),
diff --git a/gdb/ada-lex.l b/gdb/ada-lex.l
index d64496a..f61efba 100644
--- a/gdb/ada-lex.l
+++ b/gdb/ada-lex.l
@@ -1,4 +1,4 @@
-/* FLEX lexer for Ada expressions, for GDB.
+/* FLEX lexer for Ada expressions, for GDB. -*- c++ -*-
    Copyright (C) 1994-2022 Free Software Foundation, Inc.
 
    This file is part of GDB.
@@ -150,20 +150,22 @@ static int paren_depth;
 		}
 
 <INITIAL>"'"({GRAPHIC}|\")"'" {
-		   yylval.typed_val.type = type_char (pstate);
 		   yylval.typed_val.val = yytext[1];
+		   yylval.typed_val.type = type_for_char (pstate, yytext[1]);
 		   return CHARLIT;
 		}
 
-<INITIAL>"'[\""{HEXDIG}{2}"\"]'"   {
-                   int v;
-                   yylval.typed_val.type = type_char (pstate);
-		   sscanf (yytext+3, "%2x", &v);
+<INITIAL>"'[\""{HEXDIG}{2,}"\"]'"   {
+                   ULONGEST v = strtoulst (yytext+3, nullptr, 16);
 		   yylval.typed_val.val = v;
+                   yylval.typed_val.type = type_for_char (pstate, v);
 		   return CHARLIT;
 		}
 
-\"({GRAPHIC}|"[\""({HEXDIG}{2}|\")"\"]")*\"   {
+	/* Note that we don't handle bracket sequences of more than 2
+	   digits here.  Currently there's no support for wide or
+	   wide-wide strings.  */
+\"({GRAPHIC}|"[\""({HEXDIG}{2,}|\")"\"]")*\"   {
 	           yylval.sval = processString (yytext+1, yyleng-2);
 		   return STRING;
 		}
@@ -513,10 +515,12 @@ processString (const char *text, int len)
 	     }
            else
 	     {
-               int chr;
-	       sscanf (p+2, "%2x", &chr);
+	       const char *end;
+	       ULONGEST chr = strtoulst (p + 2, &end, 16);
+	       if (chr > 0xff)
+		 error (_("wide strings are not yet supported"));
 	       *q = (char) chr;
-	       p += 5;
+	       p = end + 1;
 	     }
          }
        else
diff --git a/gdb/ada-valprint.c b/gdb/ada-valprint.c
index a59c392..bf95719 100644
--- a/gdb/ada-valprint.c
+++ b/gdb/ada-valprint.c
@@ -277,7 +277,11 @@ ada_emit_char (int c, struct type *type, struct ui_file *stream,
 	fprintf_filtered (stream, "%c", c);
     }
   else
-    fprintf_filtered (stream, "[\"%0*x\"]", type_len * 2, c);
+    {
+      /* Follow GNAT's lead here and only use 6 digits for
+	 wide_wide_character.  */
+      fprintf_filtered (stream, "[\"%0*x\"]", std::min (6, type_len * 2), c);
+    }
 }
 
 /* Character #I of STRING, given that TYPE_LEN is the size in bytes
diff --git a/gdb/testsuite/gdb.ada/char_enum_unicode.exp b/gdb/testsuite/gdb.ada/char_enum_unicode.exp
index aa81360..fad2399 100644
--- a/gdb/testsuite/gdb.ada/char_enum_unicode.exp
+++ b/gdb/testsuite/gdb.ada/char_enum_unicode.exp
@@ -40,4 +40,13 @@ gdb_test "print Char_Y" " = 2 $y"
 gdb_test "print Char_King" " = 3 $king"
 gdb_test "print Char_Thorn" " = 4 $thorn"
 gdb_test "print Char_Enum_Type'('x')" " = 1 'x'"
+gdb_test "print Char_Enum_Type'('\[\"0178\"\]')" " = 2 $y"
+gdb_test "print Char_Enum_Type'('\[\"1fa00\"\]')" " = 3 $king"
 gdb_test "print Char_Enum_Type'('\[\"de\"\]')" " = 4 $thorn"
+
+gdb_test "print '\[\"0178\"\]'" " = 376 $y"
+gdb_test "print '\[\"01fa00\"\]'" " = 129536 $king"
+gdb_test "print '\[\"de\"\]'" " = 222 $thorn"
+
+gdb_test "print \"\[\"0178\"\]\"" "wide strings are not yet supported"
+gdb_test "print \"\[\"de\"\]\"" " = \"\\\[\"de\"\\\]\""
diff --git a/gdb/testsuite/gdb.ada/widewide.exp b/gdb/testsuite/gdb.ada/widewide.exp
index 6fabb5b..c0268f9 100644
--- a/gdb/testsuite/gdb.ada/widewide.exp
+++ b/gdb/testsuite/gdb.ada/widewide.exp
@@ -33,9 +33,9 @@ if ![runto "foo.adb:$bp_location" ] then {
 
 gdb_test "print some_easy" "= 74 'J'"
 
-gdb_test "print some_larger" "= 48879 '\\\[\"0000beef\"\\\]'"
+gdb_test "print some_larger" "= 48879 '\\\[\"00beef\"\\\]'"
 
-gdb_test "print some_big" "= 14335727 '\\\[\"00dabeef\"\\\]'"
+gdb_test "print some_big" "= 14335727 '\\\[\"dabeef\"\\\]'"
 
 gdb_test "print my_wws" "= \" helo\""
author	Tom Tromey <tromey@adacore.com>	2022-01-26 07:11:18 -0700
committer	Tom Tromey <tromey@adacore.com>	2022-02-28 10:49:29 -0700
commit	c9f66f0005000492739dd063ea2949045bf70bc6 (patch)
tree	7298f37b0e63711abc336fa0695d80a67d5c9144
parent	a7041de85a0cc43b86989eb697cef7a6cecdbdb7 (diff)
download	gdb-c9f66f0005000492739dd063ea2949045bf70bc6.zip gdb-c9f66f0005000492739dd063ea2949045bf70bc6.tar.gz gdb-c9f66f0005000492739dd063ea2949045bf70bc6.tar.bz2