Change how C parser looks up type tag names

In an earlier version of this series, Simon noticed that something like "ptype struct type" was much slower. The deep problem here is the hack in best_symbol -- in this particular case the C parser is including SEARCH_VAR_DOMAIN, which means that a type is not "best". Fixing best_symbol would be good to do, but it's difficult and I already had many struggles getting this series to this point. So, rather than fix that, I elected to modify the parser. The key insight here is that the name lookup in classify_name is useless when a tag name is expected. This patch adds a flag so that this lookup is not done. (Incidentally, this kind of thing is much more straightforward in a recursive descent parser, something like classify_name would only be applied post-lexing where appropriate.) This change speeds up the lookup by avoiding the best_symbol hack, instead searching only for a type. Acked-By: Simon Marchi <simon.marchi@efficios.com> Reviewed-By: Guinevere Larsen <guinevere@redhat.com>
author: Tom Tromey <tom@tromey.com> 2025-08-05 17:32:55 -0600
committer: Tom Tromey <tom@tromey.com> 2025-09-08 14:06:10 -0600
commit: ed1057031bd38df25878ade56740bf79b720f5e9 (patch)
tree: 0d65c773437d7536573a1f758a1639e4443ef63a /gdb
parent: b318480f27322c15c6b10c1bab4418e81d44b77e (diff)
download: binutils-ed1057031bd38df25878ade56740bf79b720f5e9.zip
binutils-ed1057031bd38df25878ade56740bf79b720f5e9.tar.gz
binutils-ed1057031bd38df25878ade56740bf79b720f5e9.tar.bz2
1 files changed, 79 insertions, 58 deletions
diff --git a/gdb/c-exp.y b/gdb/c-exp.y
index 8c6e460..4bed177 100644
--- a/gdb/c-exp.y
+++ b/gdb/c-exp.y
@@ -106,6 +106,12 @@ struct c_parse_state
 
   /* The type stack.  */
   struct type_stack type_stack;
+
+  /* When set, a name token is not looked up.  This can be useful when
+     the search domain is known by context.  TYPE_CODE_UNDEF is used
+     to mean "unset" here -- only types with tags (enum, struct,
+     class, union) can use this feature.  */
+  type_code assume_classification = TYPE_CODE_UNDEF;
 };
 
 /* This is set and cleared in c_parse.  */
@@ -171,7 +177,7 @@ static void c_print_token (FILE *file, int type, YYSTYPE value);
 
 %type <voidval> exp exp1 type_exp start variable qualified_name lcurly function_method
 %type <lval> rcurly
-%type <tval> type typebase scalar_type
+%type <tval> type typebase scalar_type tag_name_or_complete
 %type <tvec> nonempty_typelist func_mod parameter_typelist
 /* %type <bval> block */
 
@@ -1504,71 +1510,37 @@ typebase
 			{
 			  $$ = init_complex_type (nullptr, $2);
 			}
-	|	STRUCT name
-			{ $$
-			    = lookup_struct (copy_name ($2).c_str (),
-					     pstate->expression_context_block);
-			}
-	|	STRUCT COMPLETE
+	|	STRUCT
 			{
-			  pstate->mark_completion_tag (TYPE_CODE_STRUCT,
-						       "", 0);
-			  $$ = NULL;
+			  cpstate->assume_classification = TYPE_CODE_STRUCT;
 			}
-	|	STRUCT name COMPLETE
+		tag_name_or_complete
 			{
-			  pstate->mark_completion_tag (TYPE_CODE_STRUCT,
-						       $2.ptr, $2.length);
-			  $$ = NULL;
-			}
-	|	CLASS name
-			{ $$ = lookup_struct
-			    (copy_name ($2).c_str (),
-			     pstate->expression_context_block);
+			  $$ = $3;
 			}
-	|	CLASS COMPLETE
+	|	CLASS
 			{
-			  pstate->mark_completion_tag (TYPE_CODE_STRUCT,
-						       "", 0);
-			  $$ = NULL;
+			  cpstate->assume_classification = TYPE_CODE_STRUCT;
 			}
-	|	CLASS name COMPLETE
+		tag_name_or_complete
 			{
-			  pstate->mark_completion_tag (TYPE_CODE_STRUCT,
-						       $2.ptr, $2.length);
-			  $$ = NULL;
+			  $$ = $3;
 			}
-	|	UNION name
-			{ $$
-			    = lookup_union (copy_name ($2).c_str (),
-					    pstate->expression_context_block);
-			}
-	|	UNION COMPLETE
+	|	ENUM
 			{
-			  pstate->mark_completion_tag (TYPE_CODE_UNION,
-						       "", 0);
-			  $$ = NULL;
+			  cpstate->assume_classification = TYPE_CODE_ENUM;
 			}
-	|	UNION name COMPLETE
+		tag_name_or_complete
 			{
-			  pstate->mark_completion_tag (TYPE_CODE_UNION,
-						       $2.ptr, $2.length);
-			  $$ = NULL;
-			}
-	|	ENUM name
-			{ $$ = lookup_enum (copy_name ($2).c_str (),
-					    pstate->expression_context_block);
+			  $$ = $3;
 			}
-	|	ENUM COMPLETE
+	|	UNION
 			{
-			  pstate->mark_completion_tag (TYPE_CODE_ENUM, "", 0);
-			  $$ = NULL;
+			  cpstate->assume_classification = TYPE_CODE_UNION;
 			}
-	|	ENUM name COMPLETE
+		tag_name_or_complete
 			{
-			  pstate->mark_completion_tag (TYPE_CODE_ENUM, $2.ptr,
-						       $2.length);
-			  $$ = NULL;
+			  $$ = $3;
 			}
 		/* It appears that this rule for templates is never
 		   reduced; template recognition happens by lookahead
@@ -1809,6 +1781,47 @@ field_name
 	|	UNSIGNED { $$ = typename_stoken ("unsigned"); }
 	;
 
+/* This rule is used when the preceding token is a keyword that takes
+   a tag name (e.g., "struct").  The "caller" should disable name
+   lookup, see c_parse_state::assume_classification.  */
+tag_name_or_complete
+	:	NAME
+		{
+		  switch (cpstate->assume_classification)
+		    {
+		    case TYPE_CODE_STRUCT:
+		      $$ = lookup_struct (copy_name ($1.stoken).c_str (),
+					  pstate->expression_context_block);
+		      break;
+		    case TYPE_CODE_ENUM:
+		      $$ = lookup_enum (copy_name ($1.stoken).c_str (),
+					pstate->expression_context_block);
+		      break;
+		    case TYPE_CODE_UNION:
+		      $$ = lookup_union (copy_name ($1.stoken).c_str (),
+					 pstate->expression_context_block);
+		      break;
+		    default:
+		      gdb_assert_not_reached ();
+		    }
+		  cpstate->assume_classification = TYPE_CODE_UNDEF;
+		}
+	|	COMPLETE
+		{
+		  pstate->mark_completion_tag (cpstate->assume_classification,
+					       "", 0);
+		  cpstate->assume_classification = TYPE_CODE_UNDEF;
+		  $$ = nullptr;
+		}
+	|	NAME COMPLETE
+		{
+		  pstate->mark_completion_tag (cpstate->assume_classification,
+					       $1.stoken.ptr, $1.stoken.length);
+		  cpstate->assume_classification = TYPE_CODE_UNDEF;
+		  $$ = nullptr;
+		}
+	;
+
 name	:	NAME { $$ = $1.stoken; }
 	|	BLOCKNAME { $$ = $1.stoken; }
 	|	TYPENAME { $$ = $1.stoken; }
@@ -3250,12 +3263,15 @@ yylex (void)
      subsequent code is C++-only; but also depends on seeing a "::" or
      name-like token.  */
   current.token = lex_one_token (pstate, &is_quoted_name);
-  if (current.token == NAME)
+  if (cpstate->assume_classification == TYPE_CODE_UNDEF
+      && current.token == NAME)
     current.token = classify_name (pstate, pstate->expression_context_block,
 				   is_quoted_name, last_lex_was_structop);
   if (pstate->language ()->la_language != language_cplus
       || (current.token != TYPENAME && current.token != COLONCOLON
-	  && current.token != FILENAME))
+	  && current.token != FILENAME
+	  && (cpstate->assume_classification == TYPE_CODE_UNDEF
+	      || current.token != NAME)))
     return current.token;
 
   /* Read any sequence of alternating "::" and name-like tokens into
@@ -3296,7 +3312,8 @@ yylex (void)
     search_block = NULL;
   else
     {
-      gdb_assert (current.token == TYPENAME);
+      gdb_assert (current.token == TYPENAME
+		  || cpstate->assume_classification != TYPE_CODE_UNDEF);
       search_block = pstate->expression_context_block;
       obstack_grow (&name_obstack, current.value.sval.ptr,
 		    current.value.sval.length);
@@ -3319,8 +3336,11 @@ yylex (void)
 	  int classification;
 
 	  yylval = next.value;
-	  classification = classify_inner_name (pstate, search_block,
-						context_type);
+	  if (cpstate->assume_classification != TYPE_CODE_UNDEF)
+	    classification = NAME;
+	  else
+	    classification = classify_inner_name (pstate, search_block,
+						  context_type);
 	  /* We keep going until we either run out of names, or until
 	     we have a qualified name which is not a type.  */
 	  if (classification != TYPENAME && classification != NAME)
@@ -3330,7 +3350,7 @@ yylex (void)
 	  checkpoint = next_to_examine;
 
 	  /* Update the partial name we are constructing.  */
-	  if (context_type != NULL)
+	  if (next_to_examine > 1)
 	    {
 	      /* We don't want to put a leading "::" into the name.  */
 	      obstack_grow_str (&name_obstack, "::");
@@ -3345,7 +3365,8 @@ yylex (void)
 
 	  last_was_coloncolon = 0;
 
-	  if (classification == NAME)
+	  if (cpstate->assume_classification == TYPE_CODE_UNDEF
+	      && classification == NAME)
 	    break;
 
 	  context_type = yylval.tsym.type;
author	Tom Tromey <tom@tromey.com>	2025-08-05 17:32:55 -0600
committer	Tom Tromey <tom@tromey.com>	2025-09-08 14:06:10 -0600
commit	ed1057031bd38df25878ade56740bf79b720f5e9 (patch)
tree	0d65c773437d7536573a1f758a1639e4443ef63a /gdb
parent	b318480f27322c15c6b10c1bab4418e81d44b77e (diff)
download	binutils-ed1057031bd38df25878ade56740bf79b720f5e9.zip binutils-ed1057031bd38df25878ade56740bf79b720f5e9.tar.gz binutils-ed1057031bd38df25878ade56740bf79b720f5e9.tar.bz2