Make strcmp_iw NOT ignore whitespace in the middle of tokens

currently "b func tion" manages to set a breakpoint at "function" ! All these years I had never noticed this, but now that the linespec completer actually works, this easily happens by accident, with: "b func t<tab>" expecting to get "thread", but getting instead: "b func tion" ... Also, this: "b rettypefunc<int>" manages to set a breakpoint on "rettype func<int>()". These things happen due to strcmp_iw "magic". Fix it by teaching strcmp_iw about when can it skip whitespace. This required handling user-defined operators, and scope operators, complicating the code a bit, unfortunately. I added unit tests for all the corner cases I stumbled on, as I was developing this, and then in the end wrote a testsuite testcase covering many of the same things and more (to be added later). gdb/ChangeLog: 2017-11-24 Pedro Alves <palves@redhat.com> * cp-support.c (cp_symbol_name_matches_1): New, factored out from cp_fq_symbol_name_matches. Pass language_cplus to strncmp_with_mode. (cp_fq_symbol_name_matches): Call cp_symbol_name_matches_1. (selftests::test_cp_symbol_name_cmp): New. (_initialize_cp_support): Register "cp_symbol_name_matches" selftests. * language.c (default_symbol_name_matcher): Pass language_minimal to strncmp_iw_with_mode. * utils.c: Include "cp-support.h" and <algorithm>. (valid_identifier_name_char, cp_skip_operator_token, skip_ws) (cp_is_operator): New functions. (strncmp_iw_with_mode): Use them. Add language parameter. Don't skip whitespace in the symbol name when the lookup name doesn't have spaces, and vice versa. (strncmp_iw, strcmp_iw): Pass language to strncmp_iw_with_mode. * utils.h (strncmp_iw_with_mode): Add language parameter.
author: Pedro Alves <palves@redhat.com> 2017-11-24 23:30:04 +0000
committer: Pedro Alves <palves@redhat.com> 2017-11-24 23:30:04 +0000
commit: 0662b6a7c1b3b04a4ca31a09af703c91c7aa9646 (patch)
tree: f4d2777ed27434ce96a3130325e36fd6dcc29b6f /gdb/utils.c
parent: 276da9b31bd6e3eb8d1dd814c867266f59f29093 (diff)
download: gdb-0662b6a7c1b3b04a4ca31a09af703c91c7aa9646.zip
gdb-0662b6a7c1b3b04a4ca31a09af703c91c7aa9646.tar.gz
gdb-0662b6a7c1b3b04a4ca31a09af703c91c7aa9646.tar.bz2
1 files changed, 226 insertions, 7 deletions
diff --git a/gdb/utils.c b/gdb/utils.c
index b5c011b..3e817ed 100644
--- a/gdb/utils.c
+++ b/gdb/utils.c
@@ -68,6 +68,8 @@
 #include "job-control.h"
 #include "common/selftest.h"
 #include "common/gdb_optional.h"
+#include "cp-support.h"
+#include <algorithm>
 
 #if !HAVE_DECL_MALLOC
 extern PTR malloc ();		/* ARI: PTR */
@@ -2156,22 +2158,233 @@ fprintf_symbol_filtered (struct ui_file *stream, const char *name,
     }
 }
 
+/* True if CH is a character that can be part of a symbol name.  I.e.,
+   either a number, a letter, or a '_'.  */
+
+static bool
+valid_identifier_name_char (int ch)
+{
+  return (isalnum (ch) || ch == '_');
+}
+
+/* Skip to end of token, or to END, whatever comes first.  Input is
+   assumed to be a C++ operator name.  */
+
+static const char *
+cp_skip_operator_token (const char *token, const char *end)
+{
+  const char *p = token;
+  while (p != end && !isspace (*p) && *p != '(')
+    {
+      if (valid_identifier_name_char (*p))
+	{
+	  while (p != end && valid_identifier_name_char (*p))
+	    p++;
+	  return p;
+	}
+      else
+	{
+	  /* Note, ordered such that among ops that share a prefix,
+	     longer comes first.  This is so that the loop below can
+	     bail on first match.  */
+	  static const char *ops[] =
+	    {
+	      "[",
+	      "]",
+	      "~",
+	      ",",
+	      "-=", "--", "->", "-",
+	      "+=", "++", "+",
+	      "*=", "*",
+	      "/=", "/",
+	      "%=", "%",
+	      "|=", "||", "|",
+	      "&=", "&&", "&",
+	      "^=", "^",
+	      "!=", "!",
+	      "<<=", "<=", "<<", "<",
+	      ">>=", ">=", ">>", ">",
+	      "==", "=",
+	    };
+
+	  for (const char *op : ops)
+	    {
+	      size_t oplen = strlen (op);
+	      size_t lencmp = std::min<size_t> (oplen, end - p);
+
+	      if (strncmp (p, op, lencmp) == 0)
+		return p + lencmp;
+	    }
+	  /* Some unidentified character.  Return it.  */
+	  return p + 1;
+	}
+    }
+
+  return p;
+}
+
+/* Advance STRING1/STRING2 past whitespace.  */
+
+static void
+skip_ws (const char *&string1, const char *&string2, const char *end_str2)
+{
+  while (isspace (*string1))
+    string1++;
+  while (string2 < end_str2 && isspace (*string2))
+    string2++;
+}
+
+/* True if STRING points at the start of a C++ operator name.  START
+   is the start of the string that STRING points to, hence when
+   reading backwards, we must not read any character before START.  */
+
+static bool
+cp_is_operator (const char *string, const char *start)
+{
+  return ((string == start
+	   || !valid_identifier_name_char (string[-1]))
+	  && strncmp (string, CP_OPERATOR_STR, CP_OPERATOR_LEN) == 0
+	  && !valid_identifier_name_char (string[CP_OPERATOR_LEN]));
+}
+
 /* See utils.h.  */
 
 int
 strncmp_iw_with_mode (const char *string1, const char *string2,
-		      size_t string2_len, strncmp_iw_mode mode)
+		      size_t string2_len, strncmp_iw_mode mode,
+		      enum language language)
 {
+  const char *string1_start = string1;
   const char *end_str2 = string2 + string2_len;
+  bool skip_spaces = true;
+  bool have_colon_op = (language == language_cplus
+			|| language == language_rust
+			|| language == language_fortran);
 
   while (1)
     {
-      while (isspace (*string1))
-	string1++;
-      while (string2 < end_str2 && isspace (*string2))
-	string2++;
+      if (skip_spaces
+	  || ((isspace (*string1) && !valid_identifier_name_char (*string2))
+	      || (isspace (*string2) && !valid_identifier_name_char (*string1))))
+	{
+	  skip_ws (string1, string2, end_str2);
+	  skip_spaces = false;
+	}
+
       if (*string1 == '\0' || string2 == end_str2)
 	break;
+
+      /* Handle the :: operator.  */
+      if (have_colon_op && string1[0] == ':' && string1[1] == ':')
+	{
+	  if (*string2 != ':')
+	    return 1;
+
+	  string1++;
+	  string2++;
+
+	  if (string2 == end_str2)
+	    break;
+
+	  if (*string2 != ':')
+	    return 1;
+
+	  string1++;
+	  string2++;
+
+	  while (isspace (*string1))
+	    string1++;
+	  while (string2 < end_str2 && isspace (*string2))
+	    string2++;
+	  continue;
+	}
+
+      /* Handle C++ user-defined operators.  */
+      else if (language == language_cplus
+	       && *string1 == 'o')
+	{
+	  if (cp_is_operator (string1, string1_start))
+	    {
+	      /* An operator name in STRING1.  Check STRING2.  */
+	      size_t cmplen
+		= std::min<size_t> (CP_OPERATOR_LEN, end_str2 - string2);
+	      if (strncmp (string1, string2, cmplen) != 0)
+		return 1;
+
+	      string1 += cmplen;
+	      string2 += cmplen;
+
+	      if (string2 != end_str2)
+		{
+		  /* Check for "operatorX" in STRING2.  */
+		  if (valid_identifier_name_char (*string2))
+		    return 1;
+
+		  skip_ws (string1, string2, end_str2);
+		}
+
+	      /* Handle operator().  */
+	      if (*string1 == '(')
+		{
+		  if (string2 == end_str2)
+		    {
+		      if (mode == strncmp_iw_mode::NORMAL)
+			return 0;
+		      else
+			{
+			  /* Don't break for the regular return at the
+			     bottom, because "operator" should not
+			     match "operator()", since this open
+			     parentheses is not the parameter list
+			     start.  */
+			  return *string1 != '\0';
+			}
+		    }
+
+		  if (*string1 != *string2)
+		    return 1;
+
+		  string1++;
+		  string2++;
+		}
+
+	      while (1)
+		{
+		  skip_ws (string1, string2, end_str2);
+
+		  /* Skip to end of token, or to END, whatever comes
+		     first.  */
+		  const char *end_str1 = string1 + strlen (string1);
+		  const char *p1 = cp_skip_operator_token (string1, end_str1);
+		  const char *p2 = cp_skip_operator_token (string2, end_str2);
+
+		  cmplen = std::min (p1 - string1, p2 - string2);
+		  if (p2 == end_str2)
+		    {
+		      if (strncmp (string1, string2, cmplen) != 0)
+			return 1;
+		    }
+		  else
+		    {
+		      if (p1 - string1 != p2 - string2)
+			return 1;
+		      if (strncmp (string1, string2, cmplen) != 0)
+			return 1;
+		    }
+
+		  string1 += cmplen;
+		  string2 += cmplen;
+
+		  if (*string1 == '\0' || string2 == end_str2)
+		    break;
+		  if (*string1 == '(' || *string2 == '(')
+		    break;
+		}
+
+	      continue;
+	    }
+	}
+
       if (case_sensitivity == case_sensitive_on && *string1 != *string2)
 	break;
       if (case_sensitivity == case_sensitive_off
@@ -2179,6 +2392,12 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
 	      != tolower ((unsigned char) *string2)))
 	break;
 
+      /* If we see any non-whitespace, non-identifier-name character
+	 (any of "()<>*&" etc.), then skip spaces the next time
+	 around.  */
+      if (!isspace (*string1) && !valid_identifier_name_char (*string1))
+	skip_spaces = true;
+
       string1++;
       string2++;
     }
@@ -2200,7 +2419,7 @@ int
 strncmp_iw (const char *string1, const char *string2, size_t string2_len)
 {
   return strncmp_iw_with_mode (string1, string2, string2_len,
-			       strncmp_iw_mode::NORMAL);
+			       strncmp_iw_mode::NORMAL, language_minimal);
 }
 
 /* See utils.h.  */
@@ -2209,7 +2428,7 @@ int
 strcmp_iw (const char *string1, const char *string2)
 {
   return strncmp_iw_with_mode (string1, string2, strlen (string2),
-			       strncmp_iw_mode::MATCH_PARAMS);
+			       strncmp_iw_mode::MATCH_PARAMS, language_minimal);
 }
 
 /* This is like strcmp except that it ignores whitespace and treats
author	Pedro Alves <palves@redhat.com>	2017-11-24 23:30:04 +0000
committer	Pedro Alves <palves@redhat.com>	2017-11-24 23:30:04 +0000
commit	0662b6a7c1b3b04a4ca31a09af703c91c7aa9646 (patch)
tree	f4d2777ed27434ce96a3130325e36fd6dcc29b6f /gdb/utils.c
parent	276da9b31bd6e3eb8d1dd814c867266f59f29093 (diff)
download	gdb-0662b6a7c1b3b04a4ca31a09af703c91c7aa9646.zip gdb-0662b6a7c1b3b04a4ca31a09af703c91c7aa9646.tar.gz gdb-0662b6a7c1b3b04a4ca31a09af703c91c7aa9646.tar.bz2