Don't lex floating-point number in Rust field expression

Consider this Rust tuple: let tuple_tuple = ((23i32, 24i32), 25i32); Here, the value is a tuple whose first element is also a tuple. You should be able to print this with: (gdb) print tuple_tuple.0.1 However, currently the Rust lexer sees "0.1" as a floating-point number. This patch fixes the problem by introducing a special case in the lexer: when parsing a field expression, the parser informs the lexer that a number should be handled as a decimal integer only. This change then lets us remove the decimal integer special case from lex_number. v2: I realized that the other DECIMAL_INTEGER cases aren't needed any more. Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=32472
author: Tom Tromey <tromey@adacore.com> 2024-12-17 12:35:44 -0700
committer: Tom Tromey <tromey@adacore.com> 2025-01-06 07:03:40 -0700
commit: 350609bb98cee92c6b4fbd334b99066683b9e5c1 (patch)
tree: c39e570d4c52168791b998dfd25fb6ab4579dcc1
parent: 03736207c6c81713c9ed749877de1c0e0ff3ffee (diff)
download: gdb-350609bb98cee92c6b4fbd334b99066683b9e5c1.zip
gdb-350609bb98cee92c6b4fbd334b99066683b9e5c1.tar.gz
gdb-350609bb98cee92c6b4fbd334b99066683b9e5c1.tar.bz2
3 files changed, 55 insertions, 40 deletions
diff --git a/gdb/rust-parse.c b/gdb/rust-parse.c
index b44f119..490adb3 100644
--- a/gdb/rust-parse.c
+++ b/gdb/rust-parse.c
@@ -235,27 +235,31 @@ struct rust_parser
   std::string super_name (const std::string &ident, unsigned int n_supers);
 
   int lex_character ();
+  int lex_decimal_integer ();
   int lex_number ();
   int lex_string ();
   int lex_identifier ();
   uint32_t lex_hex (int min, int max);
   uint32_t lex_escape (bool is_byte);
   int lex_operator ();
-  int lex_one_token ();
+  int lex_one_token (bool decimal_only);
   void push_back (char c);
 
   /* The main interface to lexing.  Lexes one token and updates the
-     internal state.  */
-  void lex ()
+     internal state.  DECIMAL_ONLY is true in the special case where
+     we want to tell the lexer not to parse a number as a float, but
+     instead only as a decimal integer.  See parse_field.  */
+  void lex (bool decimal_only = false)
   {
-    current_token = lex_one_token ();
+    current_token = lex_one_token (decimal_only);
   }
 
-  /* Assuming the current token is TYPE, lex the next token.  */
-  void assume (int type)
+  /* Assuming the current token is TYPE, lex the next token.
+     DECIMAL_ONLY is passed to 'lex', which see.  */
+  void assume (int type, bool decimal_only = false)
   {
     gdb_assert (current_token == type);
-    lex ();
+    lex (decimal_only);
   }
 
   /* Require the single-character token C, and lex the next token; or
@@ -898,6 +902,26 @@ rust_parser::lex_operator ()
   return *pstate->lexptr++;
 }
 
+/* Lex a decimal integer.  */
+
+int
+rust_parser::lex_decimal_integer ()
+{
+  gdb_assert (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9');
+
+  std::string copy;
+  while (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
+    {
+      copy.push_back (pstate->lexptr[0]);
+      ++pstate->lexptr;
+    }
+
+  /* No need to set the value's type in this situation.  */
+  current_int_val.val.set (copy.c_str (), 10);
+
+  return DECIMAL_INTEGER;
+}
+
 /* Lex a number.  */
 
 int
@@ -906,7 +930,6 @@ rust_parser::lex_number ()
   regmatch_t subexps[NUM_SUBEXPRESSIONS];
   int match;
   bool is_integer = false;
-  bool could_be_decimal = true;
   bool implicit_i32 = false;
   const char *type_name = NULL;
   struct type *type;
@@ -929,10 +952,7 @@ rust_parser::lex_number ()
 	  implicit_i32 = true;
 	}
       else
-	{
-	  type_index = INT_TYPE;
-	  could_be_decimal = false;
-	}
+	type_index = INT_TYPE;
     }
   else if (subexps[FLOAT_TYPE1].rm_so != -1)
     {
@@ -968,7 +988,6 @@ rust_parser::lex_number ()
 	  is_integer = true;
 	  end_index = subexps[0].rm_eo;
 	  type_name = "i32";
-	  could_be_decimal = true;
 	  implicit_i32 = true;
 	}
     }
@@ -992,9 +1011,7 @@ rust_parser::lex_number ()
   std::string number;
   for (int i = 0; i < end_index && pstate->lexptr[i]; ++i)
     {
-      if (pstate->lexptr[i] == '_')
-	could_be_decimal = false;
-      else
+      if (pstate->lexptr[i] != '_')
 	number.push_back (pstate->lexptr[i]);
     }
 
@@ -1016,10 +1033,7 @@ rust_parser::lex_number ()
 	  else if (number[1] == 'b')
 	    radix = 2;
 	  if (radix != 10)
-	    {
-	      offset = 2;
-	      could_be_decimal = false;
-	    }
+	    offset = 2;
 	}
 
       if (!current_int_val.val.set (number.c_str () + offset, radix))
@@ -1049,13 +1063,13 @@ rust_parser::lex_number ()
       gdb_assert (parsed);
     }
 
-  return is_integer ? (could_be_decimal ? DECIMAL_INTEGER : INTEGER) : FLOAT;
+  return is_integer ? INTEGER : FLOAT;
 }
 
 /* The lexer.  */
 
 int
-rust_parser::lex_one_token ()
+rust_parser::lex_one_token (bool decimal_only)
 {
   /* Skip all leading whitespace.  */
   while (pstate->lexptr[0] == ' '
@@ -1082,7 +1096,7 @@ rust_parser::lex_one_token ()
     }
 
   if (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
-    return lex_number ();
+    return decimal_only ? lex_decimal_integer () : lex_number ();
   else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '\'')
     return lex_character ();
   else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '"')
@@ -1537,7 +1551,7 @@ rust_parser::parse_addr ()
 operation_up
 rust_parser::parse_field (operation_up &&lhs)
 {
-  assume ('.');
+  assume ('.', true);
 
   operation_up result;
   switch (current_token)
@@ -1567,9 +1581,6 @@ rust_parser::parse_field (operation_up &&lhs)
       }
       break;
 
-    case INTEGER:
-      error (_("'_' not allowed in integers in anonymous field references"));
-
     default:
       error (_("field name expected"));
     }
@@ -1663,7 +1674,7 @@ rust_parser::parse_array_type ()
   struct type *elt_type = parse_type ();
   require (';');
 
-  if (current_token != INTEGER && current_token != DECIMAL_INTEGER)
+  if (current_token != INTEGER)
     error (_("integer expected"));
   ULONGEST val = current_int_val.val.as_integer<ULONGEST> ();
   lex ();
@@ -2036,7 +2047,6 @@ rust_parser::parse_atom (bool required)
       break;
 
     case INTEGER:
-    case DECIMAL_INTEGER:
       result = make_operation<long_const_operation> (current_int_val.type,
 						     current_int_val.val);
       lex ();
@@ -2177,12 +2187,12 @@ rust_lex_test_one (rust_parser *parser, const char *input, int expected)
 
   parser->reset (input);
 
-  token = parser->lex_one_token ();
+  token = parser->lex_one_token (false);
   SELF_CHECK (token == expected);
 
   if (token)
     {
-      token = parser->lex_one_token ();
+      token = parser->lex_one_token (false);
       SELF_CHECK (token == 0);
     }
 }
@@ -2236,7 +2246,7 @@ rust_lex_test_sequence (rust_parser *parser, const char *input, int len,
 
   for (int i = 0; i < len; ++i)
     {
-      int token = parser->lex_one_token ();
+      int token = parser->lex_one_token (false);
       SELF_CHECK (token == expected[i]);
     }
 }
@@ -2246,10 +2256,10 @@ rust_lex_test_sequence (rust_parser *parser, const char *input, int len,
 static void
 rust_lex_test_trailing_dot (rust_parser *parser)
 {
-  const int expected1[] = { DECIMAL_INTEGER, '.', IDENT, '(', ')', 0 };
+  const int expected1[] = { INTEGER, '.', IDENT, '(', ')', 0 };
   const int expected2[] = { INTEGER, '.', IDENT, '(', ')', 0 };
   const int expected3[] = { FLOAT, EQEQ, '(', ')', 0 };
-  const int expected4[] = { DECIMAL_INTEGER, DOTDOT, DECIMAL_INTEGER, 0 };
+  const int expected4[] = { INTEGER, DOTDOT, INTEGER, 0 };
 
   rust_lex_test_sequence (parser, "23.g()", ARRAY_SIZE (expected1), expected1);
   rust_lex_test_sequence (parser, "23_0.g()", ARRAY_SIZE (expected2),
@@ -2285,16 +2295,16 @@ rust_lex_test_push_back (rust_parser *parser)
 
   parser->reset (">>=");
 
-  token = parser->lex_one_token ();
+  token = parser->lex_one_token (false);
   SELF_CHECK (token == COMPOUND_ASSIGN);
   SELF_CHECK (parser->current_opcode == BINOP_RSH);
 
   parser->push_back ('=');
 
-  token = parser->lex_one_token ();
+  token = parser->lex_one_token (false);
   SELF_CHECK (token == '=');
 
-  token = parser->lex_one_token ();
+  token = parser->lex_one_token (false);
   SELF_CHECK (token == 0);
 }
 
@@ -2352,7 +2362,7 @@ rust_lex_tests (void)
   rust_lex_exception_test (&parser, "'\\Q'", "Invalid escape \\Q in literal");
   rust_lex_exception_test (&parser, "b'\\Q'", "Invalid escape \\Q in literal");
 
-  rust_lex_int_test (&parser, "23", 23, DECIMAL_INTEGER);
+  rust_lex_int_test (&parser, "23", 23, INTEGER);
   rust_lex_int_test (&parser, "2_344__29", 234429, INTEGER);
   rust_lex_int_test (&parser, "0x1f", 0x1f, INTEGER);
   rust_lex_int_test (&parser, "23usize", 23, INTEGER);
@@ -2377,7 +2387,7 @@ rust_lex_tests (void)
   rust_lex_stringish_test (&parser, "thread", "thread", IDENT);
   rust_lex_stringish_test (&parser, "r#true", "true", IDENT);
 
-  const int expected1[] = { IDENT, DECIMAL_INTEGER, 0 };
+  const int expected1[] = { IDENT, INTEGER, 0 };
   rust_lex_test_sequence (&parser, "r#thread 23", ARRAY_SIZE (expected1),
 			  expected1);
   const int expected2[] = { IDENT, '#', 0 };
diff --git a/gdb/testsuite/gdb.rust/simple.exp b/gdb/testsuite/gdb.rust/simple.exp
index 37a2e07..e269e41 100644
--- a/gdb/testsuite/gdb.rust/simple.exp
+++ b/gdb/testsuite/gdb.rust/simple.exp
@@ -138,7 +138,7 @@ gdb_test "print z.1" " = 8"
 
 # Some error checks.
 gdb_test "print z.1_0" \
-    "'_' not allowed in integers in anonymous field references"
+    "Syntax error near '_0'"
 gdb_test "print z.mut" "field name expected"
 
 gdb_test "print univariant" " = simple::Univariant::Foo{a: 1}"
@@ -424,3 +424,6 @@ gdb_test "print \$one = \$two = 75" " = \\\(\\\)"
 
 gdb_test "info symbol 0xffffffffffffffff" \
     "No symbol matches 0xffffffffffffffff."
+
+# This used to confound the lexer into thinking "0.1" is a float.
+gdb_test "print tuple_tuple.0.1" " = 24"
diff --git a/gdb/testsuite/gdb.rust/simple.rs b/gdb/testsuite/gdb.rust/simple.rs
index 2dcbea2..d9d6b32 100644
--- a/gdb/testsuite/gdb.rust/simple.rs
+++ b/gdb/testsuite/gdb.rust/simple.rs
@@ -181,6 +181,8 @@ fn main () {
 
     let nonzero_offset = EnumWithNonzeroOffset { a: Some(1), b: None };
 
+    let tuple_tuple = ((23i32, 24i32), 25i32);
+
     println!("{}, {}", x.0, x.1);        // set breakpoint here
     println!("{}", diff2(92, 45));
     empty();
author	Tom Tromey <tromey@adacore.com>	2024-12-17 12:35:44 -0700
committer	Tom Tromey <tromey@adacore.com>	2025-01-06 07:03:40 -0700
commit	350609bb98cee92c6b4fbd334b99066683b9e5c1 (patch)
tree	c39e570d4c52168791b998dfd25fb6ab4579dcc1
parent	03736207c6c81713c9ed749877de1c0e0ff3ffee (diff)
download	gdb-350609bb98cee92c6b4fbd334b99066683b9e5c1.zip gdb-350609bb98cee92c6b4fbd334b99066683b9e5c1.tar.gz gdb-350609bb98cee92c6b4fbd334b99066683b9e5c1.tar.bz2