diff options
Diffstat (limited to 'gdb/break-cond-parse.c')
-rw-r--r-- | gdb/break-cond-parse.c | 694 |
1 files changed, 694 insertions, 0 deletions
diff --git a/gdb/break-cond-parse.c b/gdb/break-cond-parse.c new file mode 100644 index 0000000..04a8895 --- /dev/null +++ b/gdb/break-cond-parse.c @@ -0,0 +1,694 @@ +/* Copyright (C) 2023-2025 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "defs.h" +#include "gdbsupport/gdb_assert.h" +#include "gdbsupport/selftest.h" +#include "test-target.h" +#include "scoped-mock-context.h" +#include "break-cond-parse.h" +#include "tid-parse.h" +#include "ada-lang.h" +#include "exceptions.h" + +/* When parsing tokens from a string, which direction are we parsing? + + Given the following string and pointer 'ptr': + + ABC DEF GHI JKL + ^ + ptr + + Parsing 'forward' will return the token 'GHI' and update 'ptr' to point + between GHI and JKL. Parsing 'backward' will return the token 'DEF' and + update 'ptr' to point between ABC and DEF. +*/ + +enum class parse_direction +{ + /* Parse the next token forwards. */ + forward, + + /* Parse the previous token backwards. */ + backward +}; + +/* Find the next token in DIRECTION from *CURR. */ + +static std::string_view +find_next_token (const char **curr, parse_direction direction) +{ + const char *tok_start, *tok_end; + + gdb_assert (**curr != '\0'); + + if (direction == parse_direction::forward) + { + *curr = skip_spaces (*curr); + tok_start = *curr; + *curr = skip_to_space (*curr); + tok_end = *curr - 1; + } + else + { + gdb_assert (direction == parse_direction::backward); + + while (isspace (**curr)) + --(*curr); + + tok_end = *curr; + + while (!isspace (**curr)) + --(*curr); + + tok_start = (*curr) + 1; + } + + return std::string_view (tok_start, tok_end - tok_start + 1); +} + +/* A class that represents a complete parsed token. Each token has a type + and a std::string_view into the original breakpoint condition string. */ + +struct token +{ + /* The types a token might take. */ + enum class type + { + /* These are the token types for the 'if', 'thread', 'inferior', and + 'task' keywords. The m_content for these token types is the value + passed to the keyword, not the keyword itself. */ + CONDITION, + THREAD, + INFERIOR, + TASK, + + /* This is the token used when we find unknown content, the m_content + for this token is the rest of the input string. */ + REST, + + /* This is the token for the -force-condition token, the m_content for + this token contains the keyword itself. */ + FORCE + }; + + token (enum type type, std::string_view content) + : m_type (type), + m_content (std::move (content)) + { + /* Nothing. */ + } + + /* Return a string representing this token. Only used for debug. */ + std::string to_string () const + { + switch (m_type) + { + case type::CONDITION: + return string_printf ("{ CONDITION: \"%s\" }", + std::string (m_content).c_str ()); + case type::THREAD: + return string_printf ("{ THREAD: \"%s\" }", + std::string (m_content).c_str ()); + case type::INFERIOR: + return string_printf ("{ INFERIOR: \"%s\" }", + std::string (m_content).c_str ()); + case type::TASK: + return string_printf ("{ TASK: \"%s\" }", + std::string (m_content).c_str ()); + case type::REST: + return string_printf ("{ REST: \"%s\" }", + std::string (m_content).c_str ()); + case type::FORCE: + return string_printf ("{ FORCE }"); + default: + return "** unknown **"; + } + } + + /* The type of this token. */ + const type &get_type () const + { + return m_type; + } + + /* Return the value of this token. */ + const std::string_view &get_value () const + { + gdb_assert (m_content.size () > 0); + return m_content; + } + + /* Extend this token with the contents of OTHER. This only makes sense + if OTHER is the next token after this one in the original string, + however, enforcing that restriction is left to the caller of this + function. + + When OTHER is a keyword/value token, e.g. 'thread 1', the m_content + for OTHER will only point to the '1'. However, as the m_content is a + std::string_view, then when we merge the m_content of OTHER into this + token we automatically merge in the 'thread' part too, as it + naturally sits between this token and OTHER. */ + + void + extend (const token &other) + { + m_content = std::string_view (this->m_content.data (), + (other.m_content.data () + - this->m_content.data () + + other.m_content.size ())); + } + +private: + /* The type of this token. */ + type m_type; + + /* The important content part of this token. The extend member function + depends on this being a std::string_view. */ + std::string_view m_content; +}; + +/* Split STR, a breakpoint condition string, into a vector of tokens where + each token represents a component of the condition. Tokens are first + parsed from the front of STR until we encounter an 'if' token. At this + point tokens are parsed from the end of STR until we encounter an + unknown token, which we assume is the other end of the 'if' condition. + If when scanning forward we encounter an unknown token then the + remainder of STR is placed into a 'rest' token (the rest of the + string), and no backward scan is performed. */ + +static std::vector<token> +parse_all_tokens (const char *str) +{ + gdb_assert (str != nullptr); + + std::vector<token> forward_results; + std::vector<token> backward_results; + + const char *cond_start = nullptr; + const char *cond_end = nullptr; + parse_direction direction = parse_direction::forward; + std::vector<token> *curr_results = &forward_results; + while (*str != '\0') + { + /* Find the next token. If moving backward and this token starts at + the same location as the condition then we must have found the + other end of the condition string -- we're done. */ + std::string_view t = find_next_token (&str, direction); + if (direction == parse_direction::backward && t.data () <= cond_start) + { + cond_end = &t.back (); + break; + } + + /* We only have a single flag option to check for. All the other + options take a value so require an additional token to be found. + Additionally, we require that this flag be at least '-f', we + don't allow it to be abbreviated to '-'. */ + if (t.length () > 1 && startswith ("-force-condition", t)) + { + curr_results->emplace_back (token::type::FORCE, t); + continue; + } + + /* Maybe the first token was the last token in the string. If this + is the case then we definitely can't try to extract a value + token. This also means that the token T is meaningless. Reset + TOK to point at the start of the unknown content and break out of + the loop. We'll record the unknown part of the string outside of + the scanning loop (below). */ + if (direction == parse_direction::forward && *str == '\0') + { + str = t.data (); + break; + } + + /* As before, find the next token and, if we are scanning backwards, + check that we have not reached the start of the condition string. */ + std::string_view v = find_next_token (&str, direction); + if (direction == parse_direction::backward && v.data () <= cond_start) + { + /* Use token T here as that must also be part of the condition + string. */ + cond_end = &t.back (); + break; + } + + /* When moving backward we will first parse the value token then the + keyword token, so swap them now. */ + if (direction == parse_direction::backward) + std::swap (t, v); + + /* Check for valid option in token T. If we find a valid option then + parse the value from the token V. Except for 'if', that's handled + differently. + + For the 'if' token we need to capture the entire condition + string, so record the start of the condition string and then + start scanning backwards looking for the end of the condition + string. + + The order of these checks is important, at least the check for + 'thread' must occur before the check for 'task'. We accept + abbreviations of these token names, and 't' should resolve to + 'thread', which will only happen if we check 'thread' first. */ + if (direction == parse_direction::forward && startswith ("if", t)) + { + cond_start = v.data (); + str = str + strlen (str); + gdb_assert (*str == '\0'); + --str; + direction = parse_direction::backward; + curr_results = &backward_results; + continue; + } + else if (startswith ("thread", t)) + curr_results->emplace_back (token::type::THREAD, v); + else if (startswith ("inferior", t)) + curr_results->emplace_back (token::type::INFERIOR, v); + else if (startswith ("task", t)) + curr_results->emplace_back (token::type::TASK, v); + else + { + /* An unknown token. If we are scanning forward then reset TOK + to point at the start of the unknown content, we record this + outside of the scanning loop (below). + + If we are scanning backward then unknown content is assumed to + be the other end of the condition string, obviously, this is + just a heuristic, we could be looking at a mistyped command + line, but this will be spotted when the condition is + eventually evaluated. + + Either way, no more scanning is required after this. */ + if (direction == parse_direction::forward) + str = t.data (); + else + { + gdb_assert (direction == parse_direction::backward); + cond_end = &v.back (); + } + break; + } + } + + if (cond_start != nullptr) + { + /* If we found the start of a condition string then we should have + switched to backward scan mode, and found the end of the condition + string. Capture the whole condition string into COND_STRING + now. */ + gdb_assert (direction == parse_direction::backward); + gdb_assert (cond_end != nullptr); + + std::string_view v (cond_start, cond_end - cond_start + 1); + + forward_results.emplace_back (token::type::CONDITION, v); + } + else if (*str != '\0') + { + /* If we didn't have a condition start pointer then we should still + be in forward scanning mode. If we didn't reach the end of the + input string (TOK is not at the null character) then the rest of + the input string is garbage that we didn't understand. + + Record the unknown content into REST. The caller of this function + will report this as an error later on. We could report the error + here, but we prefer to allow the caller to run other checks, and + prioritise other errors before reporting this problem. */ + gdb_assert (direction == parse_direction::forward); + gdb_assert (cond_end == nullptr); + + std::string_view v (str, strlen (str)); + + forward_results.emplace_back (token::type::REST, v); + } + + /* If we have tokens in the BACKWARD_RESULTS vector then this means that + we found an 'if' condition (which will be the last thing in the + FORWARD_RESULTS vector), and then we started a backward scan. + + The last tokens from the input string (those after the 'if' condition) + will be the first tokens added to the BACKWARD_RESULTS vector, so the + last items in the BACKWARD_RESULTS vector are those next to the 'if' + condition. + + Check the tokens in the BACKWARD_RESULTS vector from back to front. + If the tokens look invalid then we assume that they are actually part + of the 'if' condition, and merge the token with the 'if' condition. + If it turns out that this was incorrect and that instead the user just + messed up entering the token value, then this will show as an error + when parsing the 'if' condition. + + Doing this allows us to handle things like: + + break function if ( variable == thread ) + + Where 'thread' is a local variable within 'function'. When parsing + this we will initially see 'thread )' as a thread token with ')' as + the value. However, the following code will spot that ')' is not a + valid thread-id, and so we merge 'thread )' into the 'if' condition + string. + + This code also handles the special treatment for '-force-condition', + which exists for backwards compatibility reasons. Traditionally this + flag, if it occurred immediately after the 'if' condition, would be + treated as part of the 'if' condition. When the breakpoint condition + parsing code was rewritten, this behavior was retained. */ + gdb_assert (backward_results.empty () + || (forward_results.back ().get_type () + == token::type::CONDITION)); + while (!backward_results.empty ()) + { + token &t = backward_results.back (); + + if (t.get_type () == token::type::FORCE) + forward_results.back ().extend (std::move (t)); + else if (t.get_type () == token::type::THREAD) + { + const char *end; + std::string v (t.get_value ()); + if (is_thread_id (v.c_str (), &end) && *end == '\0') + break; + forward_results.back ().extend (std::move (t)); + } + else if (t.get_type () == token::type::INFERIOR + || t.get_type () == token::type::TASK) + { + /* Place the token's value into a null-terminated string, parse + the string as a number and check that the entire string was + parsed. If this is true then this looks like a valid inferior + or task number, otherwise, assume an invalid id, and merge + this token with the 'if' token. */ + char *end; + std::string v (t.get_value ()); + (void) strtol (v.c_str (), &end, 0); + if (end > v.c_str () && *end == '\0') + break; + forward_results.back ().extend (std::move (t)); + } + else + gdb_assert_not_reached ("unexpected token type"); + + /* If we found an actual valid token above then we will have broken + out of the loop. We only get here if the token was merged with + the 'if' condition, in which case we can discard the last token + and then check the token before that. */ + backward_results.pop_back (); + } + + /* If after the above checks we still have some tokens in the + BACKWARD_RESULTS vector, then these need to be appended to the + FORWARD_RESULTS vector. However, we first reverse the order so that + FORWARD_RESULTS retains the tokens in the order they appeared in the + input string. */ + if (!backward_results.empty ()) + forward_results.insert (forward_results.end (), + backward_results.rbegin (), + backward_results.rend ()); + + return forward_results; +} + +/* Called when the global debug_breakpoint is true. Prints VEC to the + debug output stream. */ + +static void +dump_condition_tokens (const std::vector<token> &vec) +{ + gdb_assert (debug_breakpoint); + + bool first = true; + std::string str = "Tokens: "; + for (const token &t : vec) + { + if (!first) + str += " "; + first = false; + str += t.to_string (); + } + breakpoint_debug_printf ("%s", str.c_str ()); +} + +/* See break-cond-parse.h. */ + +void +create_breakpoint_parse_arg_string + (const char *str, gdb::unique_xmalloc_ptr<char> *cond_string_ptr, + int *thread_ptr, int *inferior_ptr, int *task_ptr, + gdb::unique_xmalloc_ptr<char> *rest_ptr, bool *force_ptr) +{ + /* Set up the defaults. */ + cond_string_ptr->reset (); + rest_ptr->reset (); + *thread_ptr = -1; + *inferior_ptr = -1; + *task_ptr = -1; + *force_ptr = false; + + if (str == nullptr) + return; + + /* Split STR into a series of tokens. */ + std::vector<token> tokens = parse_all_tokens (str); + if (debug_breakpoint) + dump_condition_tokens (tokens); + + /* Temporary variables. Initialised to the default state, then updated + as we parse TOKENS. If all of TOKENS is parsed successfully then the + state from these variables is copied into the output arguments before + the function returns. */ + int thread = -1, inferior = -1, task = -1; + bool force = false; + gdb::unique_xmalloc_ptr<char> cond_string, rest; + + for (const token &t : tokens) + { + std::string tok_value (t.get_value ()); + switch (t.get_type ()) + { + case token::type::FORCE: + force = true; + break; + case token::type::THREAD: + { + if (thread != -1) + error ("You can specify only one thread."); + if (task != -1 || inferior != -1) + error ("You can specify only one of thread, inferior, or task."); + const char *tmptok; + thread_info *thr = parse_thread_id (tok_value.c_str (), &tmptok); + gdb_assert (*tmptok == '\0'); + thread = thr->global_num; + } + break; + case token::type::INFERIOR: + { + if (inferior != -1) + error ("You can specify only one inferior."); + if (task != -1 || thread != -1) + error ("You can specify only one of thread, inferior, or task."); + char *tmptok; + long inferior_id = strtol (tok_value.c_str (), &tmptok, 0); + if (*tmptok != '\0') + error (_("Junk '%s' after inferior keyword."), tmptok); + if (inferior_id > INT_MAX) + error (_("No inferior number '%ld'"), inferior_id); + inferior = static_cast<int> (inferior_id); + struct inferior *inf = find_inferior_id (inferior); + if (inf == nullptr) + error (_("No inferior number '%d'"), inferior); + } + break; + case token::type::TASK: + { + if (task != -1) + error ("You can specify only one task."); + if (inferior != -1 || thread != -1) + error ("You can specify only one of thread, inferior, or task."); + char *tmptok; + long task_id = strtol (tok_value.c_str (), &tmptok, 0); + if (*tmptok != '\0') + error (_("Junk '%s' after task keyword."), tmptok); + if (task_id > INT_MAX) + error (_("Unknown task %ld"), task_id); + task = static_cast<int> (task_id); + if (!valid_task_id (task)) + error (_("Unknown task %d."), task); + } + break; + case token::type::CONDITION: + cond_string.reset (savestring (t.get_value ().data (), + t.get_value ().size ())); + break; + case token::type::REST: + rest.reset (savestring (t.get_value ().data (), + t.get_value ().size ())); + break; + } + } + + /* Move results into the output locations. */ + *force_ptr = force; + *thread_ptr = thread; + *inferior_ptr = inferior; + *task_ptr = task; + rest_ptr->reset (rest.release ()); + cond_string_ptr->reset (cond_string.release ()); +} + +#if GDB_SELF_TEST + +namespace selftests { + +/* Run a single test of the create_breakpoint_parse_arg_string function. + INPUT is passed to create_breakpoint_parse_arg_string while all other + arguments are the expected output from + create_breakpoint_parse_arg_string. */ + +static void +test (const char *input, const char *condition, int thread = -1, + int inferior = -1, int task = -1, bool force = false, + const char *rest = nullptr, const char *error_msg = nullptr) +{ + gdb::unique_xmalloc_ptr<char> extracted_condition; + gdb::unique_xmalloc_ptr<char> extracted_rest; + int extracted_thread, extracted_inferior, extracted_task; + bool extracted_force_condition; + std::string exception_msg; + + if (error_msg == nullptr) + error_msg = ""; + + try + { + create_breakpoint_parse_arg_string (input, &extracted_condition, + &extracted_thread, + &extracted_inferior, + &extracted_task, &extracted_rest, + &extracted_force_condition); + } + catch (const gdb_exception_error &ex) + { + exception_msg = ex.what (); + } + + if ((condition == nullptr) != (extracted_condition.get () == nullptr) + || (condition != nullptr + && strcmp (condition, extracted_condition.get ()) != 0) + || (rest == nullptr) != (extracted_rest.get () == nullptr) + || (rest != nullptr && strcmp (rest, extracted_rest.get ()) != 0) + || thread != extracted_thread + || inferior != extracted_inferior + || task != extracted_task + || force != extracted_force_condition + || exception_msg != error_msg) + { + if (run_verbose ()) + { + debug_printf ("input: '%s'\n", input); + debug_printf ("condition: '%s'\n", extracted_condition.get ()); + debug_printf ("rest: '%s'\n", extracted_rest.get ()); + debug_printf ("thread: %d\n", extracted_thread); + debug_printf ("inferior: %d\n", extracted_inferior); + debug_printf ("task: %d\n", extracted_task); + debug_printf ("forced: %s\n", + extracted_force_condition ? "true" : "false"); + debug_printf ("exception: '%s'\n", exception_msg.c_str ()); + } + + /* Report the failure. */ + SELF_CHECK (false); + } +} + +/* Wrapper for test function. Pass through the default values for all + parameters, except the last parameter, which indicates that we expect + INPUT to trigger an error. */ + +static void +test_error (const char *input, const char *error_msg) +{ + test (input, nullptr, -1, -1, -1, false, nullptr, error_msg); +} + +/* Test the create_breakpoint_parse_arg_string function. Just wraps + multiple calls to the test function above. */ + +static void +create_breakpoint_parse_arg_string_tests () +{ + gdbarch *arch = current_inferior ()->arch (); + scoped_restore_current_pspace_and_thread restore; + scoped_mock_context<test_target_ops> mock_target (arch); + + int global_thread_num = mock_target.mock_thread.global_num; + + /* Test parsing valid breakpoint condition strings. */ + test (" if blah ", "blah"); + test (" if blah thread 1", "blah", global_thread_num); + test (" if blah inferior 1", "blah", -1, 1); + test (" if blah thread 1 ", "blah", global_thread_num); + test ("thread 1 woof", nullptr, global_thread_num, -1, -1, false, "woof"); + test ("thread 1 X", nullptr, global_thread_num, -1, -1, false, "X"); + test (" if blah thread 1 -force-condition", "blah", global_thread_num, + -1, -1, true); + test (" -force-condition if blah thread 1", "blah", global_thread_num, + -1, -1, true); + test (" -force-condition if blah thread 1 ", "blah", global_thread_num, + -1, -1, true); + test ("thread 1 -force-condition if blah", "blah", global_thread_num, + -1, -1, true); + test ("if (A::outer::func ())", "(A::outer::func ())"); + test ("if ( foo == thread )", "( foo == thread )"); + test ("if ( foo == thread ) inferior 1", "( foo == thread )", -1, 1); + test ("if ( foo == thread ) thread 1", "( foo == thread )", + global_thread_num); + test ("if foo == thread", "foo == thread"); + test ("if foo == thread 1", "foo ==", global_thread_num); + + /* Test parsing some invalid breakpoint condition strings. */ + test_error ("thread 1 if foo == 123 thread 1", + "You can specify only one thread."); + test_error ("thread 1 if foo == 123 inferior 1", + "You can specify only one of thread, inferior, or task."); + test_error ("thread 1 if foo == 123 task 1", + "You can specify only one of thread, inferior, or task."); + test_error ("inferior 1 if foo == 123 inferior 1", + "You can specify only one inferior."); + test_error ("inferior 1 if foo == 123 thread 1", + "You can specify only one of thread, inferior, or task."); + test_error ("inferior 1 if foo == 123 task 1", + "You can specify only one of thread, inferior, or task."); + test_error ("thread 1.2.3", "Invalid thread ID: 1.2.3"); + test_error ("thread 1/2", "Invalid thread ID: 1/2"); + test_error ("thread 1xxx", "Invalid thread ID: 1xxx"); + test_error ("inferior 1xxx", "Junk 'xxx' after inferior keyword."); + test_error ("task 1xxx", "Junk 'xxx' after task keyword."); +} + +} /* namespace selftests */ +#endif /* GDB_SELF_TEST */ + +INIT_GDB_FILE (break_cond_parse) +{ +#if GDB_SELF_TEST + selftests::register_test + ("create_breakpoint_parse_arg_string", + selftests::create_breakpoint_parse_arg_string_tests); +#endif +} |