aboutsummaryrefslogtreecommitdiff
path: root/gdb/break-cond-parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'gdb/break-cond-parse.c')
-rw-r--r--gdb/break-cond-parse.c694
1 files changed, 694 insertions, 0 deletions
diff --git a/gdb/break-cond-parse.c b/gdb/break-cond-parse.c
new file mode 100644
index 0000000..04a8895
--- /dev/null
+++ b/gdb/break-cond-parse.c
@@ -0,0 +1,694 @@
+/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
+
+ This file is part of GDB.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "defs.h"
+#include "gdbsupport/gdb_assert.h"
+#include "gdbsupport/selftest.h"
+#include "test-target.h"
+#include "scoped-mock-context.h"
+#include "break-cond-parse.h"
+#include "tid-parse.h"
+#include "ada-lang.h"
+#include "exceptions.h"
+
+/* When parsing tokens from a string, which direction are we parsing?
+
+ Given the following string and pointer 'ptr':
+
+ ABC DEF GHI JKL
+ ^
+ ptr
+
+ Parsing 'forward' will return the token 'GHI' and update 'ptr' to point
+ between GHI and JKL. Parsing 'backward' will return the token 'DEF' and
+ update 'ptr' to point between ABC and DEF.
+*/
+
+enum class parse_direction
+{
+ /* Parse the next token forwards. */
+ forward,
+
+ /* Parse the previous token backwards. */
+ backward
+};
+
+/* Find the next token in DIRECTION from *CURR. */
+
+static std::string_view
+find_next_token (const char **curr, parse_direction direction)
+{
+ const char *tok_start, *tok_end;
+
+ gdb_assert (**curr != '\0');
+
+ if (direction == parse_direction::forward)
+ {
+ *curr = skip_spaces (*curr);
+ tok_start = *curr;
+ *curr = skip_to_space (*curr);
+ tok_end = *curr - 1;
+ }
+ else
+ {
+ gdb_assert (direction == parse_direction::backward);
+
+ while (isspace (**curr))
+ --(*curr);
+
+ tok_end = *curr;
+
+ while (!isspace (**curr))
+ --(*curr);
+
+ tok_start = (*curr) + 1;
+ }
+
+ return std::string_view (tok_start, tok_end - tok_start + 1);
+}
+
+/* A class that represents a complete parsed token. Each token has a type
+ and a std::string_view into the original breakpoint condition string. */
+
+struct token
+{
+ /* The types a token might take. */
+ enum class type
+ {
+ /* These are the token types for the 'if', 'thread', 'inferior', and
+ 'task' keywords. The m_content for these token types is the value
+ passed to the keyword, not the keyword itself. */
+ CONDITION,
+ THREAD,
+ INFERIOR,
+ TASK,
+
+ /* This is the token used when we find unknown content, the m_content
+ for this token is the rest of the input string. */
+ REST,
+
+ /* This is the token for the -force-condition token, the m_content for
+ this token contains the keyword itself. */
+ FORCE
+ };
+
+ token (enum type type, std::string_view content)
+ : m_type (type),
+ m_content (std::move (content))
+ {
+ /* Nothing. */
+ }
+
+ /* Return a string representing this token. Only used for debug. */
+ std::string to_string () const
+ {
+ switch (m_type)
+ {
+ case type::CONDITION:
+ return string_printf ("{ CONDITION: \"%s\" }",
+ std::string (m_content).c_str ());
+ case type::THREAD:
+ return string_printf ("{ THREAD: \"%s\" }",
+ std::string (m_content).c_str ());
+ case type::INFERIOR:
+ return string_printf ("{ INFERIOR: \"%s\" }",
+ std::string (m_content).c_str ());
+ case type::TASK:
+ return string_printf ("{ TASK: \"%s\" }",
+ std::string (m_content).c_str ());
+ case type::REST:
+ return string_printf ("{ REST: \"%s\" }",
+ std::string (m_content).c_str ());
+ case type::FORCE:
+ return string_printf ("{ FORCE }");
+ default:
+ return "** unknown **";
+ }
+ }
+
+ /* The type of this token. */
+ const type &get_type () const
+ {
+ return m_type;
+ }
+
+ /* Return the value of this token. */
+ const std::string_view &get_value () const
+ {
+ gdb_assert (m_content.size () > 0);
+ return m_content;
+ }
+
+ /* Extend this token with the contents of OTHER. This only makes sense
+ if OTHER is the next token after this one in the original string,
+ however, enforcing that restriction is left to the caller of this
+ function.
+
+ When OTHER is a keyword/value token, e.g. 'thread 1', the m_content
+ for OTHER will only point to the '1'. However, as the m_content is a
+ std::string_view, then when we merge the m_content of OTHER into this
+ token we automatically merge in the 'thread' part too, as it
+ naturally sits between this token and OTHER. */
+
+ void
+ extend (const token &other)
+ {
+ m_content = std::string_view (this->m_content.data (),
+ (other.m_content.data ()
+ - this->m_content.data ()
+ + other.m_content.size ()));
+ }
+
+private:
+ /* The type of this token. */
+ type m_type;
+
+ /* The important content part of this token. The extend member function
+ depends on this being a std::string_view. */
+ std::string_view m_content;
+};
+
+/* Split STR, a breakpoint condition string, into a vector of tokens where
+ each token represents a component of the condition. Tokens are first
+ parsed from the front of STR until we encounter an 'if' token. At this
+ point tokens are parsed from the end of STR until we encounter an
+ unknown token, which we assume is the other end of the 'if' condition.
+ If when scanning forward we encounter an unknown token then the
+ remainder of STR is placed into a 'rest' token (the rest of the
+ string), and no backward scan is performed. */
+
+static std::vector<token>
+parse_all_tokens (const char *str)
+{
+ gdb_assert (str != nullptr);
+
+ std::vector<token> forward_results;
+ std::vector<token> backward_results;
+
+ const char *cond_start = nullptr;
+ const char *cond_end = nullptr;
+ parse_direction direction = parse_direction::forward;
+ std::vector<token> *curr_results = &forward_results;
+ while (*str != '\0')
+ {
+ /* Find the next token. If moving backward and this token starts at
+ the same location as the condition then we must have found the
+ other end of the condition string -- we're done. */
+ std::string_view t = find_next_token (&str, direction);
+ if (direction == parse_direction::backward && t.data () <= cond_start)
+ {
+ cond_end = &t.back ();
+ break;
+ }
+
+ /* We only have a single flag option to check for. All the other
+ options take a value so require an additional token to be found.
+ Additionally, we require that this flag be at least '-f', we
+ don't allow it to be abbreviated to '-'. */
+ if (t.length () > 1 && startswith ("-force-condition", t))
+ {
+ curr_results->emplace_back (token::type::FORCE, t);
+ continue;
+ }
+
+ /* Maybe the first token was the last token in the string. If this
+ is the case then we definitely can't try to extract a value
+ token. This also means that the token T is meaningless. Reset
+ TOK to point at the start of the unknown content and break out of
+ the loop. We'll record the unknown part of the string outside of
+ the scanning loop (below). */
+ if (direction == parse_direction::forward && *str == '\0')
+ {
+ str = t.data ();
+ break;
+ }
+
+ /* As before, find the next token and, if we are scanning backwards,
+ check that we have not reached the start of the condition string. */
+ std::string_view v = find_next_token (&str, direction);
+ if (direction == parse_direction::backward && v.data () <= cond_start)
+ {
+ /* Use token T here as that must also be part of the condition
+ string. */
+ cond_end = &t.back ();
+ break;
+ }
+
+ /* When moving backward we will first parse the value token then the
+ keyword token, so swap them now. */
+ if (direction == parse_direction::backward)
+ std::swap (t, v);
+
+ /* Check for valid option in token T. If we find a valid option then
+ parse the value from the token V. Except for 'if', that's handled
+ differently.
+
+ For the 'if' token we need to capture the entire condition
+ string, so record the start of the condition string and then
+ start scanning backwards looking for the end of the condition
+ string.
+
+ The order of these checks is important, at least the check for
+ 'thread' must occur before the check for 'task'. We accept
+ abbreviations of these token names, and 't' should resolve to
+ 'thread', which will only happen if we check 'thread' first. */
+ if (direction == parse_direction::forward && startswith ("if", t))
+ {
+ cond_start = v.data ();
+ str = str + strlen (str);
+ gdb_assert (*str == '\0');
+ --str;
+ direction = parse_direction::backward;
+ curr_results = &backward_results;
+ continue;
+ }
+ else if (startswith ("thread", t))
+ curr_results->emplace_back (token::type::THREAD, v);
+ else if (startswith ("inferior", t))
+ curr_results->emplace_back (token::type::INFERIOR, v);
+ else if (startswith ("task", t))
+ curr_results->emplace_back (token::type::TASK, v);
+ else
+ {
+ /* An unknown token. If we are scanning forward then reset TOK
+ to point at the start of the unknown content, we record this
+ outside of the scanning loop (below).
+
+ If we are scanning backward then unknown content is assumed to
+ be the other end of the condition string, obviously, this is
+ just a heuristic, we could be looking at a mistyped command
+ line, but this will be spotted when the condition is
+ eventually evaluated.
+
+ Either way, no more scanning is required after this. */
+ if (direction == parse_direction::forward)
+ str = t.data ();
+ else
+ {
+ gdb_assert (direction == parse_direction::backward);
+ cond_end = &v.back ();
+ }
+ break;
+ }
+ }
+
+ if (cond_start != nullptr)
+ {
+ /* If we found the start of a condition string then we should have
+ switched to backward scan mode, and found the end of the condition
+ string. Capture the whole condition string into COND_STRING
+ now. */
+ gdb_assert (direction == parse_direction::backward);
+ gdb_assert (cond_end != nullptr);
+
+ std::string_view v (cond_start, cond_end - cond_start + 1);
+
+ forward_results.emplace_back (token::type::CONDITION, v);
+ }
+ else if (*str != '\0')
+ {
+ /* If we didn't have a condition start pointer then we should still
+ be in forward scanning mode. If we didn't reach the end of the
+ input string (TOK is not at the null character) then the rest of
+ the input string is garbage that we didn't understand.
+
+ Record the unknown content into REST. The caller of this function
+ will report this as an error later on. We could report the error
+ here, but we prefer to allow the caller to run other checks, and
+ prioritise other errors before reporting this problem. */
+ gdb_assert (direction == parse_direction::forward);
+ gdb_assert (cond_end == nullptr);
+
+ std::string_view v (str, strlen (str));
+
+ forward_results.emplace_back (token::type::REST, v);
+ }
+
+ /* If we have tokens in the BACKWARD_RESULTS vector then this means that
+ we found an 'if' condition (which will be the last thing in the
+ FORWARD_RESULTS vector), and then we started a backward scan.
+
+ The last tokens from the input string (those after the 'if' condition)
+ will be the first tokens added to the BACKWARD_RESULTS vector, so the
+ last items in the BACKWARD_RESULTS vector are those next to the 'if'
+ condition.
+
+ Check the tokens in the BACKWARD_RESULTS vector from back to front.
+ If the tokens look invalid then we assume that they are actually part
+ of the 'if' condition, and merge the token with the 'if' condition.
+ If it turns out that this was incorrect and that instead the user just
+ messed up entering the token value, then this will show as an error
+ when parsing the 'if' condition.
+
+ Doing this allows us to handle things like:
+
+ break function if ( variable == thread )
+
+ Where 'thread' is a local variable within 'function'. When parsing
+ this we will initially see 'thread )' as a thread token with ')' as
+ the value. However, the following code will spot that ')' is not a
+ valid thread-id, and so we merge 'thread )' into the 'if' condition
+ string.
+
+ This code also handles the special treatment for '-force-condition',
+ which exists for backwards compatibility reasons. Traditionally this
+ flag, if it occurred immediately after the 'if' condition, would be
+ treated as part of the 'if' condition. When the breakpoint condition
+ parsing code was rewritten, this behavior was retained. */
+ gdb_assert (backward_results.empty ()
+ || (forward_results.back ().get_type ()
+ == token::type::CONDITION));
+ while (!backward_results.empty ())
+ {
+ token &t = backward_results.back ();
+
+ if (t.get_type () == token::type::FORCE)
+ forward_results.back ().extend (std::move (t));
+ else if (t.get_type () == token::type::THREAD)
+ {
+ const char *end;
+ std::string v (t.get_value ());
+ if (is_thread_id (v.c_str (), &end) && *end == '\0')
+ break;
+ forward_results.back ().extend (std::move (t));
+ }
+ else if (t.get_type () == token::type::INFERIOR
+ || t.get_type () == token::type::TASK)
+ {
+ /* Place the token's value into a null-terminated string, parse
+ the string as a number and check that the entire string was
+ parsed. If this is true then this looks like a valid inferior
+ or task number, otherwise, assume an invalid id, and merge
+ this token with the 'if' token. */
+ char *end;
+ std::string v (t.get_value ());
+ (void) strtol (v.c_str (), &end, 0);
+ if (end > v.c_str () && *end == '\0')
+ break;
+ forward_results.back ().extend (std::move (t));
+ }
+ else
+ gdb_assert_not_reached ("unexpected token type");
+
+ /* If we found an actual valid token above then we will have broken
+ out of the loop. We only get here if the token was merged with
+ the 'if' condition, in which case we can discard the last token
+ and then check the token before that. */
+ backward_results.pop_back ();
+ }
+
+ /* If after the above checks we still have some tokens in the
+ BACKWARD_RESULTS vector, then these need to be appended to the
+ FORWARD_RESULTS vector. However, we first reverse the order so that
+ FORWARD_RESULTS retains the tokens in the order they appeared in the
+ input string. */
+ if (!backward_results.empty ())
+ forward_results.insert (forward_results.end (),
+ backward_results.rbegin (),
+ backward_results.rend ());
+
+ return forward_results;
+}
+
+/* Called when the global debug_breakpoint is true. Prints VEC to the
+ debug output stream. */
+
+static void
+dump_condition_tokens (const std::vector<token> &vec)
+{
+ gdb_assert (debug_breakpoint);
+
+ bool first = true;
+ std::string str = "Tokens: ";
+ for (const token &t : vec)
+ {
+ if (!first)
+ str += " ";
+ first = false;
+ str += t.to_string ();
+ }
+ breakpoint_debug_printf ("%s", str.c_str ());
+}
+
+/* See break-cond-parse.h. */
+
+void
+create_breakpoint_parse_arg_string
+ (const char *str, gdb::unique_xmalloc_ptr<char> *cond_string_ptr,
+ int *thread_ptr, int *inferior_ptr, int *task_ptr,
+ gdb::unique_xmalloc_ptr<char> *rest_ptr, bool *force_ptr)
+{
+ /* Set up the defaults. */
+ cond_string_ptr->reset ();
+ rest_ptr->reset ();
+ *thread_ptr = -1;
+ *inferior_ptr = -1;
+ *task_ptr = -1;
+ *force_ptr = false;
+
+ if (str == nullptr)
+ return;
+
+ /* Split STR into a series of tokens. */
+ std::vector<token> tokens = parse_all_tokens (str);
+ if (debug_breakpoint)
+ dump_condition_tokens (tokens);
+
+ /* Temporary variables. Initialised to the default state, then updated
+ as we parse TOKENS. If all of TOKENS is parsed successfully then the
+ state from these variables is copied into the output arguments before
+ the function returns. */
+ int thread = -1, inferior = -1, task = -1;
+ bool force = false;
+ gdb::unique_xmalloc_ptr<char> cond_string, rest;
+
+ for (const token &t : tokens)
+ {
+ std::string tok_value (t.get_value ());
+ switch (t.get_type ())
+ {
+ case token::type::FORCE:
+ force = true;
+ break;
+ case token::type::THREAD:
+ {
+ if (thread != -1)
+ error ("You can specify only one thread.");
+ if (task != -1 || inferior != -1)
+ error ("You can specify only one of thread, inferior, or task.");
+ const char *tmptok;
+ thread_info *thr = parse_thread_id (tok_value.c_str (), &tmptok);
+ gdb_assert (*tmptok == '\0');
+ thread = thr->global_num;
+ }
+ break;
+ case token::type::INFERIOR:
+ {
+ if (inferior != -1)
+ error ("You can specify only one inferior.");
+ if (task != -1 || thread != -1)
+ error ("You can specify only one of thread, inferior, or task.");
+ char *tmptok;
+ long inferior_id = strtol (tok_value.c_str (), &tmptok, 0);
+ if (*tmptok != '\0')
+ error (_("Junk '%s' after inferior keyword."), tmptok);
+ if (inferior_id > INT_MAX)
+ error (_("No inferior number '%ld'"), inferior_id);
+ inferior = static_cast<int> (inferior_id);
+ struct inferior *inf = find_inferior_id (inferior);
+ if (inf == nullptr)
+ error (_("No inferior number '%d'"), inferior);
+ }
+ break;
+ case token::type::TASK:
+ {
+ if (task != -1)
+ error ("You can specify only one task.");
+ if (inferior != -1 || thread != -1)
+ error ("You can specify only one of thread, inferior, or task.");
+ char *tmptok;
+ long task_id = strtol (tok_value.c_str (), &tmptok, 0);
+ if (*tmptok != '\0')
+ error (_("Junk '%s' after task keyword."), tmptok);
+ if (task_id > INT_MAX)
+ error (_("Unknown task %ld"), task_id);
+ task = static_cast<int> (task_id);
+ if (!valid_task_id (task))
+ error (_("Unknown task %d."), task);
+ }
+ break;
+ case token::type::CONDITION:
+ cond_string.reset (savestring (t.get_value ().data (),
+ t.get_value ().size ()));
+ break;
+ case token::type::REST:
+ rest.reset (savestring (t.get_value ().data (),
+ t.get_value ().size ()));
+ break;
+ }
+ }
+
+ /* Move results into the output locations. */
+ *force_ptr = force;
+ *thread_ptr = thread;
+ *inferior_ptr = inferior;
+ *task_ptr = task;
+ rest_ptr->reset (rest.release ());
+ cond_string_ptr->reset (cond_string.release ());
+}
+
+#if GDB_SELF_TEST
+
+namespace selftests {
+
+/* Run a single test of the create_breakpoint_parse_arg_string function.
+ INPUT is passed to create_breakpoint_parse_arg_string while all other
+ arguments are the expected output from
+ create_breakpoint_parse_arg_string. */
+
+static void
+test (const char *input, const char *condition, int thread = -1,
+ int inferior = -1, int task = -1, bool force = false,
+ const char *rest = nullptr, const char *error_msg = nullptr)
+{
+ gdb::unique_xmalloc_ptr<char> extracted_condition;
+ gdb::unique_xmalloc_ptr<char> extracted_rest;
+ int extracted_thread, extracted_inferior, extracted_task;
+ bool extracted_force_condition;
+ std::string exception_msg;
+
+ if (error_msg == nullptr)
+ error_msg = "";
+
+ try
+ {
+ create_breakpoint_parse_arg_string (input, &extracted_condition,
+ &extracted_thread,
+ &extracted_inferior,
+ &extracted_task, &extracted_rest,
+ &extracted_force_condition);
+ }
+ catch (const gdb_exception_error &ex)
+ {
+ exception_msg = ex.what ();
+ }
+
+ if ((condition == nullptr) != (extracted_condition.get () == nullptr)
+ || (condition != nullptr
+ && strcmp (condition, extracted_condition.get ()) != 0)
+ || (rest == nullptr) != (extracted_rest.get () == nullptr)
+ || (rest != nullptr && strcmp (rest, extracted_rest.get ()) != 0)
+ || thread != extracted_thread
+ || inferior != extracted_inferior
+ || task != extracted_task
+ || force != extracted_force_condition
+ || exception_msg != error_msg)
+ {
+ if (run_verbose ())
+ {
+ debug_printf ("input: '%s'\n", input);
+ debug_printf ("condition: '%s'\n", extracted_condition.get ());
+ debug_printf ("rest: '%s'\n", extracted_rest.get ());
+ debug_printf ("thread: %d\n", extracted_thread);
+ debug_printf ("inferior: %d\n", extracted_inferior);
+ debug_printf ("task: %d\n", extracted_task);
+ debug_printf ("forced: %s\n",
+ extracted_force_condition ? "true" : "false");
+ debug_printf ("exception: '%s'\n", exception_msg.c_str ());
+ }
+
+ /* Report the failure. */
+ SELF_CHECK (false);
+ }
+}
+
+/* Wrapper for test function. Pass through the default values for all
+ parameters, except the last parameter, which indicates that we expect
+ INPUT to trigger an error. */
+
+static void
+test_error (const char *input, const char *error_msg)
+{
+ test (input, nullptr, -1, -1, -1, false, nullptr, error_msg);
+}
+
+/* Test the create_breakpoint_parse_arg_string function. Just wraps
+ multiple calls to the test function above. */
+
+static void
+create_breakpoint_parse_arg_string_tests ()
+{
+ gdbarch *arch = current_inferior ()->arch ();
+ scoped_restore_current_pspace_and_thread restore;
+ scoped_mock_context<test_target_ops> mock_target (arch);
+
+ int global_thread_num = mock_target.mock_thread.global_num;
+
+ /* Test parsing valid breakpoint condition strings. */
+ test (" if blah ", "blah");
+ test (" if blah thread 1", "blah", global_thread_num);
+ test (" if blah inferior 1", "blah", -1, 1);
+ test (" if blah thread 1 ", "blah", global_thread_num);
+ test ("thread 1 woof", nullptr, global_thread_num, -1, -1, false, "woof");
+ test ("thread 1 X", nullptr, global_thread_num, -1, -1, false, "X");
+ test (" if blah thread 1 -force-condition", "blah", global_thread_num,
+ -1, -1, true);
+ test (" -force-condition if blah thread 1", "blah", global_thread_num,
+ -1, -1, true);
+ test (" -force-condition if blah thread 1 ", "blah", global_thread_num,
+ -1, -1, true);
+ test ("thread 1 -force-condition if blah", "blah", global_thread_num,
+ -1, -1, true);
+ test ("if (A::outer::func ())", "(A::outer::func ())");
+ test ("if ( foo == thread )", "( foo == thread )");
+ test ("if ( foo == thread ) inferior 1", "( foo == thread )", -1, 1);
+ test ("if ( foo == thread ) thread 1", "( foo == thread )",
+ global_thread_num);
+ test ("if foo == thread", "foo == thread");
+ test ("if foo == thread 1", "foo ==", global_thread_num);
+
+ /* Test parsing some invalid breakpoint condition strings. */
+ test_error ("thread 1 if foo == 123 thread 1",
+ "You can specify only one thread.");
+ test_error ("thread 1 if foo == 123 inferior 1",
+ "You can specify only one of thread, inferior, or task.");
+ test_error ("thread 1 if foo == 123 task 1",
+ "You can specify only one of thread, inferior, or task.");
+ test_error ("inferior 1 if foo == 123 inferior 1",
+ "You can specify only one inferior.");
+ test_error ("inferior 1 if foo == 123 thread 1",
+ "You can specify only one of thread, inferior, or task.");
+ test_error ("inferior 1 if foo == 123 task 1",
+ "You can specify only one of thread, inferior, or task.");
+ test_error ("thread 1.2.3", "Invalid thread ID: 1.2.3");
+ test_error ("thread 1/2", "Invalid thread ID: 1/2");
+ test_error ("thread 1xxx", "Invalid thread ID: 1xxx");
+ test_error ("inferior 1xxx", "Junk 'xxx' after inferior keyword.");
+ test_error ("task 1xxx", "Junk 'xxx' after task keyword.");
+}
+
+} /* namespace selftests */
+#endif /* GDB_SELF_TEST */
+
+INIT_GDB_FILE (break_cond_parse)
+{
+#if GDB_SELF_TEST
+ selftests::register_test
+ ("create_breakpoint_parse_arg_string",
+ selftests::create_breakpoint_parse_arg_string_tests);
+#endif
+}