1 files changed, 694 insertions, 0 deletions
diff --git a/gdb/break-cond-parse.c b/gdb/break-cond-parse.c
new file mode 100644
index 0000000..04a8895
--- /dev/null
+++ b/gdb/break-cond-parse.c
@@ -0,0 +1,694 @@
+/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "defs.h"
+#include "gdbsupport/gdb_assert.h"
+#include "gdbsupport/selftest.h"
+#include "test-target.h"
+#include "scoped-mock-context.h"
+#include "break-cond-parse.h"
+#include "tid-parse.h"
+#include "ada-lang.h"
+#include "exceptions.h"
+
+/* When parsing tokens from a string, which direction are we parsing?
+
+   Given the following string and pointer 'ptr':
+
+	ABC DEF GHI JKL
+	       ^
+	       ptr
+
+   Parsing 'forward' will return the token 'GHI' and update 'ptr' to point
+   between GHI and JKL.  Parsing 'backward' will return the token 'DEF' and
+   update 'ptr' to point between ABC and DEF.
+*/
+
+enum class parse_direction
+{
+  /* Parse the next token forwards.  */
+  forward,
+
+  /* Parse the previous token backwards.  */
+  backward
+};
+
+/* Find the next token in DIRECTION from *CURR.  */
+
+static std::string_view
+find_next_token (const char **curr, parse_direction direction)
+{
+  const char *tok_start, *tok_end;
+
+  gdb_assert (**curr != '\0');
+
+  if (direction == parse_direction::forward)
+    {
+      *curr = skip_spaces (*curr);
+      tok_start = *curr;
+      *curr = skip_to_space (*curr);
+      tok_end = *curr - 1;
+    }
+  else
+    {
+      gdb_assert (direction == parse_direction::backward);
+
+      while (isspace (**curr))
+	--(*curr);
+
+      tok_end = *curr;
+
+      while (!isspace (**curr))
+	--(*curr);
+
+      tok_start = (*curr) + 1;
+    }
+
+  return std::string_view (tok_start, tok_end - tok_start + 1);
+}
+
+/* A class that represents a complete parsed token.  Each token has a type
+   and a std::string_view into the original breakpoint condition string.  */
+
+struct token
+{
+  /* The types a token might take.  */
+  enum class type
+  {
+    /* These are the token types for the 'if', 'thread', 'inferior', and
+       'task' keywords.  The m_content for these token types is the value
+       passed to the keyword, not the keyword itself.  */
+    CONDITION,
+    THREAD,
+    INFERIOR,
+    TASK,
+
+    /* This is the token used when we find unknown content, the m_content
+       for this token is the rest of the input string.  */
+    REST,
+
+    /* This is the token for the -force-condition token, the m_content for
+       this token contains the keyword itself.  */
+    FORCE
+  };
+
+  token (enum type type, std::string_view content)
+    : m_type (type),
+      m_content (std::move (content))
+  {
+    /* Nothing.  */
+  }
+
+  /* Return a string representing this token.  Only used for debug.  */
+  std::string to_string () const
+  {
+    switch (m_type)
+      {
+      case type::CONDITION:
+	return string_printf ("{ CONDITION: \"%s\" }",
+			      std::string (m_content).c_str ());
+      case type::THREAD:
+	return string_printf ("{ THREAD: \"%s\" }",
+			      std::string (m_content).c_str ());
+      case type::INFERIOR:
+	return string_printf ("{ INFERIOR: \"%s\" }",
+			      std::string (m_content).c_str ());
+      case type::TASK:
+	return string_printf ("{ TASK: \"%s\" }",
+			      std::string (m_content).c_str ());
+      case type::REST:
+	return string_printf ("{ REST: \"%s\" }",
+			      std::string (m_content).c_str ());
+      case type::FORCE:
+	return string_printf ("{ FORCE }");
+      default:
+	return "** unknown **";
+      }
+  }
+
+  /* The type of this token.  */
+  const type &get_type () const
+  {
+    return m_type;
+  }
+
+  /* Return the value of this token.  */
+  const std::string_view &get_value () const
+  {
+    gdb_assert (m_content.size () > 0);
+    return m_content;
+  }
+
+  /* Extend this token with the contents of OTHER.  This only makes sense
+     if OTHER is the next token after this one in the original string,
+     however, enforcing that restriction is left to the caller of this
+     function.
+
+     When OTHER is a keyword/value token, e.g. 'thread 1', the m_content
+     for OTHER will only point to the '1'.  However, as the m_content is a
+     std::string_view, then when we merge the m_content of OTHER into this
+     token we automatically merge in the 'thread' part too, as it
+     naturally sits between this token and OTHER.  */
+
+  void
+  extend (const token &other)
+  {
+    m_content = std::string_view (this->m_content.data (),
+				  (other.m_content.data ()
+				   - this->m_content.data ()
+				   + other.m_content.size ()));
+  }
+
+private:
+  /* The type of this token.  */
+  type m_type;
+
+  /* The important content part of this token.  The extend member function
+     depends on this being a std::string_view.  */
+  std::string_view m_content;
+};
+
+/* Split STR, a breakpoint condition string, into a vector of tokens where
+   each token represents a component of the condition.  Tokens are first
+   parsed from the front of STR until we encounter an 'if' token.  At this
+   point tokens are parsed from the end of STR until we encounter an
+   unknown token, which we assume is the other end of the 'if' condition.
+   If when scanning forward we encounter an unknown token then the
+   remainder of STR is placed into a 'rest' token (the rest of the
+   string), and no backward scan is performed.  */
+
+static std::vector<token>
+parse_all_tokens (const char *str)
+{
+  gdb_assert (str != nullptr);
+
+  std::vector<token> forward_results;
+  std::vector<token> backward_results;
+
+  const char *cond_start = nullptr;
+  const char *cond_end = nullptr;
+  parse_direction direction = parse_direction::forward;
+  std::vector<token> *curr_results = &forward_results;
+  while (*str != '\0')
+    {
+      /* Find the next token.  If moving backward and this token starts at
+	 the same location as the condition then we must have found the
+	 other end of the condition string -- we're done.  */
+      std::string_view t = find_next_token (&str, direction);
+      if (direction == parse_direction::backward && t.data () <= cond_start)
+	{
+	  cond_end = &t.back ();
+	  break;
+	}
+
+      /* We only have a single flag option to check for.  All the other
+	 options take a value so require an additional token to be found.
+	 Additionally, we require that this flag be at least '-f', we
+	 don't allow it to be abbreviated to '-'.  */
+      if (t.length () > 1 && startswith ("-force-condition", t))
+	{
+	  curr_results->emplace_back (token::type::FORCE, t);
+	  continue;
+	}
+
+      /* Maybe the first token was the last token in the string.  If this
+	 is the case then we definitely can't try to extract a value
+	 token.  This also means that the token T is meaningless.  Reset
+	 TOK to point at the start of the unknown content and break out of
+	 the loop.  We'll record the unknown part of the string outside of
+	 the scanning loop (below).  */
+      if (direction == parse_direction::forward && *str == '\0')
+	{
+	  str = t.data ();
+	  break;
+	}
+
+      /* As before, find the next token and, if we are scanning backwards,
+	 check that we have not reached the start of the condition string.  */
+      std::string_view v = find_next_token (&str, direction);
+      if (direction == parse_direction::backward && v.data () <= cond_start)
+	{
+	  /* Use token T here as that must also be part of the condition
+	     string.  */
+	  cond_end = &t.back ();
+	  break;
+	}
+
+      /* When moving backward we will first parse the value token then the
+	 keyword token, so swap them now.  */
+      if (direction == parse_direction::backward)
+	std::swap (t, v);
+
+      /* Check for valid option in token T.  If we find a valid option then
+	 parse the value from the token V.  Except for 'if', that's handled
+	 differently.
+
+	 For the 'if' token we need to capture the entire condition
+	 string, so record the start of the condition string and then
+	 start scanning backwards looking for the end of the condition
+	 string.
+
+	 The order of these checks is important, at least the check for
+	 'thread' must occur before the check for 'task'.  We accept
+	 abbreviations of these token names, and 't' should resolve to
+	 'thread', which will only happen if we check 'thread' first.  */
+      if (direction == parse_direction::forward && startswith ("if", t))
+	{
+	  cond_start = v.data ();
+	  str = str + strlen (str);
+	  gdb_assert (*str == '\0');
+	  --str;
+	  direction = parse_direction::backward;
+	  curr_results = &backward_results;
+	  continue;
+	}
+      else if (startswith ("thread", t))
+	curr_results->emplace_back (token::type::THREAD, v);
+      else if (startswith ("inferior", t))
+	curr_results->emplace_back (token::type::INFERIOR, v);
+      else if (startswith ("task", t))
+	curr_results->emplace_back (token::type::TASK, v);
+      else
+	{
+	  /* An unknown token.  If we are scanning forward then reset TOK
+	     to point at the start of the unknown content, we record this
+	     outside of the scanning loop (below).
+
+	     If we are scanning backward then unknown content is assumed to
+	     be the other end of the condition string, obviously, this is
+	     just a heuristic, we could be looking at a mistyped command
+	     line, but this will be spotted when the condition is
+	     eventually evaluated.
+
+	     Either way, no more scanning is required after this.  */
+	  if (direction == parse_direction::forward)
+	    str = t.data ();
+	  else
+	    {
+	      gdb_assert (direction == parse_direction::backward);
+	      cond_end = &v.back ();
+	    }
+	  break;
+	}
+    }
+
+  if (cond_start != nullptr)
+    {
+      /* If we found the start of a condition string then we should have
+	 switched to backward scan mode, and found the end of the condition
+	 string.  Capture the whole condition string into COND_STRING
+	 now.  */
+      gdb_assert (direction == parse_direction::backward);
+      gdb_assert (cond_end != nullptr);
+
+      std::string_view v (cond_start, cond_end - cond_start + 1);
+
+      forward_results.emplace_back (token::type::CONDITION, v);
+    }
+  else if (*str != '\0')
+    {
+      /* If we didn't have a condition start pointer then we should still
+	 be in forward scanning mode.  If we didn't reach the end of the
+	 input string (TOK is not at the null character) then the rest of
+	 the input string is garbage that we didn't understand.
+
+	 Record the unknown content into REST.  The caller of this function
+	 will report this as an error later on.  We could report the error
+	 here, but we prefer to allow the caller to run other checks, and
+	 prioritise other errors before reporting this problem.  */
+      gdb_assert (direction == parse_direction::forward);
+      gdb_assert (cond_end == nullptr);
+
+      std::string_view v (str, strlen (str));
+
+      forward_results.emplace_back (token::type::REST, v);
+    }
+
+  /* If we have tokens in the BACKWARD_RESULTS vector then this means that
+     we found an 'if' condition (which will be the last thing in the
+     FORWARD_RESULTS vector), and then we started a backward scan.
+
+     The last tokens from the input string (those after the 'if' condition)
+     will be the first tokens added to the BACKWARD_RESULTS vector, so the
+     last items in the BACKWARD_RESULTS vector are those next to the 'if'
+     condition.
+
+     Check the tokens in the BACKWARD_RESULTS vector from back to front.
+     If the tokens look invalid then we assume that they are actually part
+     of the 'if' condition, and merge the token with the 'if' condition.
+     If it turns out that this was incorrect and that instead the user just
+     messed up entering the token value, then this will show as an error
+     when parsing the 'if' condition.
+
+     Doing this allows us to handle things like:
+
+       break function if ( variable == thread )
+
+     Where 'thread' is a local variable within 'function'.  When parsing
+     this we will initially see 'thread )' as a thread token with ')' as
+     the value.  However, the following code will spot that ')' is not a
+     valid thread-id, and so we merge 'thread )' into the 'if' condition
+     string.
+
+     This code also handles the special treatment for '-force-condition',
+     which exists for backwards compatibility reasons.  Traditionally this
+     flag, if it occurred immediately after the 'if' condition, would be
+     treated as part of the 'if' condition.  When the breakpoint condition
+     parsing code was rewritten, this behavior was retained.  */
+  gdb_assert (backward_results.empty ()
+	      || (forward_results.back ().get_type ()
+		  == token::type::CONDITION));
+  while (!backward_results.empty ())
+    {
+      token &t = backward_results.back ();
+
+      if (t.get_type () == token::type::FORCE)
+	forward_results.back ().extend (std::move (t));
+      else if (t.get_type () == token::type::THREAD)
+	{
+	  const char *end;
+	  std::string v (t.get_value ());
+	  if (is_thread_id (v.c_str (), &end) && *end == '\0')
+	    break;
+	  forward_results.back ().extend (std::move (t));
+	}
+      else if (t.get_type () == token::type::INFERIOR
+	       || t.get_type () == token::type::TASK)
+	{
+	  /* Place the token's value into a null-terminated string, parse
+	     the string as a number and check that the entire string was
+	     parsed.  If this is true then this looks like a valid inferior
+	     or task number, otherwise, assume an invalid id, and merge
+	     this token with the 'if' token.  */
+	  char *end;
+	  std::string v (t.get_value ());
+	  (void) strtol (v.c_str (), &end, 0);
+	  if (end > v.c_str () && *end == '\0')
+	    break;
+	  forward_results.back ().extend (std::move (t));
+	}
+      else
+	gdb_assert_not_reached ("unexpected token type");
+
+      /* If we found an actual valid token above then we will have broken
+	 out of the loop.  We only get here if the token was merged with
+	 the 'if' condition, in which case we can discard the last token
+	 and then check the token before that.  */
+      backward_results.pop_back ();
+    }
+
+  /* If after the above checks we still have some tokens in the
+     BACKWARD_RESULTS vector, then these need to be appended to the
+     FORWARD_RESULTS vector.  However, we first reverse the order so that
+     FORWARD_RESULTS retains the tokens in the order they appeared in the
+     input string.  */
+  if (!backward_results.empty ())
+    forward_results.insert (forward_results.end (),
+			    backward_results.rbegin (),
+			    backward_results.rend ());
+
+  return forward_results;
+}
+
+/* Called when the global debug_breakpoint is true.  Prints VEC to the
+   debug output stream.  */
+
+static void
+dump_condition_tokens (const std::vector<token> &vec)
+{
+  gdb_assert (debug_breakpoint);
+
+  bool first = true;
+  std::string str = "Tokens: ";
+  for (const token &t : vec)
+    {
+      if (!first)
+	str += " ";
+      first = false;
+      str += t.to_string ();
+    }
+  breakpoint_debug_printf ("%s", str.c_str ());
+}
+
+/* See break-cond-parse.h.  */
+
+void
+create_breakpoint_parse_arg_string
+  (const char *str, gdb::unique_xmalloc_ptr<char> *cond_string_ptr,
+   int *thread_ptr, int *inferior_ptr, int *task_ptr,
+   gdb::unique_xmalloc_ptr<char> *rest_ptr, bool *force_ptr)
+{
+  /* Set up the defaults.  */
+  cond_string_ptr->reset ();
+  rest_ptr->reset ();
+  *thread_ptr = -1;
+  *inferior_ptr = -1;
+  *task_ptr = -1;
+  *force_ptr = false;
+
+  if (str == nullptr)
+    return;
+
+  /* Split STR into a series of tokens.  */
+  std::vector<token> tokens = parse_all_tokens (str);
+  if (debug_breakpoint)
+    dump_condition_tokens (tokens);
+
+  /* Temporary variables.  Initialised to the default state, then updated
+     as we parse TOKENS.  If all of TOKENS is parsed successfully then the
+     state from these variables is copied into the output arguments before
+     the function returns.  */
+  int thread = -1, inferior = -1, task = -1;
+  bool force = false;
+  gdb::unique_xmalloc_ptr<char> cond_string, rest;
+
+  for (const token &t : tokens)
+    {
+      std::string tok_value (t.get_value ());
+      switch (t.get_type ())
+	{
+	case token::type::FORCE:
+	  force = true;
+	  break;
+	case token::type::THREAD:
+	  {
+	    if (thread != -1)
+	      error ("You can specify only one thread.");
+	    if (task != -1 || inferior != -1)
+	      error ("You can specify only one of thread, inferior, or task.");
+	    const char *tmptok;
+	    thread_info *thr = parse_thread_id (tok_value.c_str (), &tmptok);
+	    gdb_assert (*tmptok == '\0');
+	    thread = thr->global_num;
+	  }
+	  break;
+	case token::type::INFERIOR:
+	  {
+	    if (inferior != -1)
+	      error ("You can specify only one inferior.");
+	    if (task != -1 || thread != -1)
+	      error ("You can specify only one of thread, inferior, or task.");
+	    char *tmptok;
+	    long inferior_id = strtol (tok_value.c_str (), &tmptok, 0);
+	    if (*tmptok != '\0')
+	      error (_("Junk '%s' after inferior keyword."), tmptok);
+	    if (inferior_id > INT_MAX)
+	      error (_("No inferior number '%ld'"), inferior_id);
+	    inferior = static_cast<int> (inferior_id);
+	    struct inferior *inf = find_inferior_id (inferior);
+	    if (inf == nullptr)
+	      error (_("No inferior number '%d'"), inferior);
+	  }
+	  break;
+	case token::type::TASK:
+	  {
+	    if (task != -1)
+	      error ("You can specify only one task.");
+	    if (inferior != -1 || thread != -1)
+	      error ("You can specify only one of thread, inferior, or task.");
+	    char *tmptok;
+	    long task_id = strtol (tok_value.c_str (), &tmptok, 0);
+	    if (*tmptok != '\0')
+	      error (_("Junk '%s' after task keyword."), tmptok);
+	    if (task_id > INT_MAX)
+	      error (_("Unknown task %ld"), task_id);
+	    task = static_cast<int> (task_id);
+	    if (!valid_task_id (task))
+	      error (_("Unknown task %d."), task);
+	  }
+	  break;
+	case token::type::CONDITION:
+	  cond_string.reset (savestring (t.get_value ().data (),
+					 t.get_value ().size ()));
+	  break;
+	case token::type::REST:
+	  rest.reset (savestring (t.get_value ().data (),
+				  t.get_value ().size ()));
+	  break;
+	}
+    }
+
+  /* Move results into the output locations.  */
+  *force_ptr = force;
+  *thread_ptr = thread;
+  *inferior_ptr = inferior;
+  *task_ptr = task;
+  rest_ptr->reset (rest.release ());
+  cond_string_ptr->reset (cond_string.release ());
+}
+
+#if GDB_SELF_TEST
+
+namespace selftests {
+
+/* Run a single test of the create_breakpoint_parse_arg_string function.
+   INPUT is passed to create_breakpoint_parse_arg_string while all other
+   arguments are the expected output from
+   create_breakpoint_parse_arg_string.  */
+
+static void
+test (const char *input, const char *condition, int thread = -1,
+      int inferior = -1, int task = -1, bool force = false,
+      const char *rest = nullptr, const char *error_msg = nullptr)
+{
+  gdb::unique_xmalloc_ptr<char> extracted_condition;
+  gdb::unique_xmalloc_ptr<char> extracted_rest;
+  int extracted_thread, extracted_inferior, extracted_task;
+  bool extracted_force_condition;
+  std::string exception_msg;
+
+  if (error_msg == nullptr)
+    error_msg = "";
+
+  try
+    {
+      create_breakpoint_parse_arg_string (input, &extracted_condition,
+					  &extracted_thread,
+					  &extracted_inferior,
+					  &extracted_task, &extracted_rest,
+					  &extracted_force_condition);
+    }
+  catch (const gdb_exception_error &ex)
+    {
+      exception_msg = ex.what ();
+    }
+
+  if ((condition == nullptr) != (extracted_condition.get () == nullptr)
+      || (condition != nullptr
+	  && strcmp (condition, extracted_condition.get ()) != 0)
+      || (rest == nullptr) != (extracted_rest.get () == nullptr)
+      || (rest != nullptr && strcmp (rest, extracted_rest.get ()) != 0)
+      || thread != extracted_thread
+      || inferior != extracted_inferior
+      || task != extracted_task
+      || force != extracted_force_condition
+      || exception_msg != error_msg)
+    {
+      if (run_verbose ())
+	{
+	  debug_printf ("input: '%s'\n", input);
+	  debug_printf ("condition: '%s'\n", extracted_condition.get ());
+	  debug_printf ("rest: '%s'\n", extracted_rest.get ());
+	  debug_printf ("thread: %d\n", extracted_thread);
+	  debug_printf ("inferior: %d\n", extracted_inferior);
+	  debug_printf ("task: %d\n", extracted_task);
+	  debug_printf ("forced: %s\n",
+			extracted_force_condition ? "true" : "false");
+	  debug_printf ("exception: '%s'\n", exception_msg.c_str ());
+	}
+
+      /* Report the failure.  */
+      SELF_CHECK (false);
+    }
+}
+
+/* Wrapper for test function.  Pass through the default values for all
+   parameters, except the last parameter, which indicates that we expect
+   INPUT to trigger an error.  */
+
+static void
+test_error (const char *input, const char *error_msg)
+{
+  test (input, nullptr, -1, -1, -1, false, nullptr, error_msg);
+}
+
+/* Test the create_breakpoint_parse_arg_string function.  Just wraps
+   multiple calls to the test function above.  */
+
+static void
+create_breakpoint_parse_arg_string_tests ()
+{
+  gdbarch *arch = current_inferior ()->arch ();
+  scoped_restore_current_pspace_and_thread restore;
+  scoped_mock_context<test_target_ops> mock_target (arch);
+
+  int global_thread_num = mock_target.mock_thread.global_num;
+
+  /* Test parsing valid breakpoint condition strings.  */
+  test ("  if blah  ", "blah");
+  test (" if blah thread 1", "blah", global_thread_num);
+  test (" if blah inferior 1", "blah", -1, 1);
+  test (" if blah thread 1  ", "blah", global_thread_num);
+  test ("thread 1 woof", nullptr, global_thread_num, -1, -1, false, "woof");
+  test ("thread 1 X", nullptr, global_thread_num, -1, -1, false, "X");
+  test (" if blah thread 1 -force-condition", "blah", global_thread_num,
+	-1, -1, true);
+  test (" -force-condition if blah thread 1", "blah", global_thread_num,
+	-1, -1, true);
+  test (" -force-condition if blah thread 1  ", "blah", global_thread_num,
+	-1, -1, true);
+  test ("thread 1 -force-condition if blah", "blah", global_thread_num,
+	-1, -1, true);
+  test ("if (A::outer::func ())", "(A::outer::func ())");
+  test ("if ( foo == thread )", "( foo == thread )");
+  test ("if ( foo == thread ) inferior 1", "( foo == thread )", -1, 1);
+  test ("if ( foo == thread ) thread 1", "( foo == thread )",
+	global_thread_num);
+  test ("if foo == thread", "foo == thread");
+  test ("if foo == thread 1", "foo ==", global_thread_num);
+
+  /* Test parsing some invalid breakpoint condition strings.  */
+  test_error ("thread 1 if foo == 123 thread 1",
+	      "You can specify only one thread.");
+  test_error ("thread 1 if foo == 123 inferior 1",
+	      "You can specify only one of thread, inferior, or task.");
+  test_error ("thread 1 if foo == 123 task 1",
+	      "You can specify only one of thread, inferior, or task.");
+  test_error ("inferior 1 if foo == 123 inferior 1",
+	      "You can specify only one inferior.");
+  test_error ("inferior 1 if foo == 123 thread 1",
+	      "You can specify only one of thread, inferior, or task.");
+  test_error ("inferior 1 if foo == 123 task 1",
+	      "You can specify only one of thread, inferior, or task.");
+  test_error ("thread 1.2.3", "Invalid thread ID: 1.2.3");
+  test_error ("thread 1/2", "Invalid thread ID: 1/2");
+  test_error ("thread 1xxx", "Invalid thread ID: 1xxx");
+  test_error ("inferior 1xxx", "Junk 'xxx' after inferior keyword.");
+  test_error ("task 1xxx", "Junk 'xxx' after task keyword.");
+}
+
+} /* namespace selftests */
+#endif /* GDB_SELF_TEST */
+
+INIT_GDB_FILE (break_cond_parse)
+{
+#if GDB_SELF_TEST
+  selftests::register_test
+    ("create_breakpoint_parse_arg_string",
+     selftests::create_breakpoint_parse_arg_string_tests);
+#endif
+}