From a4b7c5f5cda16795dd8be11494e8f1b5de21d69f Mon Sep 17 00:00:00 2001 From: Tom Tromey Date: Fri, 19 Apr 2024 20:22:11 -0600 Subject: Implement C++14 numeric separators C++14 allows the use of the apostrophe as a numeric separator; that is, "23000" and "23'000" represent the same number. This patch implements this for gdb's C++ parser and the C++ name canonicalizer. I did this unconditionally for all C variants because I think it's unambiguous. For the name canonicalizer, there's at least one compiler that can emit constants with this form, see bug 30845. Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=23457 Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=30845 Approved-By: John Baldwin --- gdb/c-exp.y | 28 +++++++++++++++++++++++----- gdb/cp-name-parser.y | 31 ++++++++++++++++++++++++++----- gdb/testsuite/gdb.cp/misc.exp | 4 ++++ 3 files changed, 53 insertions(+), 10 deletions(-) (limited to 'gdb') diff --git a/gdb/c-exp.y b/gdb/c-exp.y index 5db6355..6022317 100644 --- a/gdb/c-exp.y +++ b/gdb/c-exp.y @@ -2755,6 +2755,10 @@ lex_one_token (struct parser_state *par_state, bool *is_quoted_name) hex = 0; } + /* If the token includes the C++14 digits separator, we make a + copy so that we don't have to handle the separator in + parse_number. */ + std::optional no_tick; for (;; ++p) { /* This test includes !hex because 'e' is a valid hex digit @@ -2771,18 +2775,32 @@ lex_one_token (struct parser_state *par_state, bool *is_quoted_name) else if (((got_e && (p[-1] == 'e' || p[-1] == 'E')) || (got_p && (p[-1] == 'p' || p[-1] == 'P'))) && (*p == '-' || *p == '+')) - /* This is the sign of the exponent, not the end of the - number. */ - continue; + { + /* This is the sign of the exponent, not the end of + the number. */ + } + else if (*p == '\'') + { + if (!no_tick.has_value ()) + no_tick.emplace (tokstart, p); + continue; + } /* We will take any letters or digits. parse_number will complain if past the radix, or if L or U are not final. */ else if ((*p < '0' || *p > '9') && ((*p < 'a' || *p > 'z') && (*p < 'A' || *p > 'Z'))) break; + if (no_tick.has_value ()) + no_tick->push_back (*p); } - toktype = parse_number (par_state, tokstart, p - tokstart, - got_dot | got_e | got_p, &yylval); + if (no_tick.has_value ()) + toktype = parse_number (par_state, no_tick->c_str (), + no_tick->length (), + got_dot | got_e | got_p, &yylval); + else + toktype = parse_number (par_state, tokstart, p - tokstart, + got_dot | got_e | got_p, &yylval); if (toktype == ERROR) error (_("Invalid number \"%.*s\"."), (int) (p - tokstart), tokstart); diff --git a/gdb/cp-name-parser.y b/gdb/cp-name-parser.y index a84051a..c0138ef 100644 --- a/gdb/cp-name-parser.y +++ b/gdb/cp-name-parser.y @@ -1686,6 +1686,10 @@ yylex (YYSTYPE *lvalp, cpname_state *state) hex = 0; } + /* If the token includes the C++14 digits separator, we make a + copy so that we don't have to handle the separator in + parse_number. */ + std::optional no_tick; for (;; ++p) { /* This test includes !hex because 'e' is a valid hex digit @@ -1703,16 +1707,31 @@ yylex (YYSTYPE *lvalp, cpname_state *state) got_dot = 1; else if (got_e && (p[-1] == 'e' || p[-1] == 'E') && (*p == '-' || *p == '+')) - /* This is the sign of the exponent, not the end of the - number. */ - continue; + { + /* This is the sign of the exponent, not the end of + the number. */ + } + /* C++14 allows a separator. */ + else if (*p == '\'') + { + if (!no_tick.has_value ()) + no_tick.emplace (tokstart, p); + continue; + } /* We will take any letters or digits. parse_number will complain if past the radix, or if L or U are not final. */ else if (! ISALNUM (*p)) break; + if (no_tick.has_value ()) + no_tick->push_back (*p); } - toktype = state->parse_number (tokstart, p - tokstart, got_dot|got_e, - lvalp); + if (no_tick.has_value ()) + toktype = state->parse_number (no_tick->c_str (), + no_tick->length (), + got_dot|got_e, lvalp); + else + toktype = state->parse_number (tokstart, p - tokstart, + got_dot|got_e, lvalp); if (toktype == ERROR) { yyerror (state, _("invalid number")); @@ -2041,6 +2060,8 @@ canonicalize_tests () should_be_the_same ("x::y::z<0b111>", "x::y::z<7>"); should_be_the_same ("x::y::z<0b111>", "x::y::z<0t7>"); should_be_the_same ("x::y::z<0b111>", "x::y::z<0D7>"); + + should_be_the_same ("x::y::z<0xff'ff>", "x::y::z<65535>"); } #endif diff --git a/gdb/testsuite/gdb.cp/misc.exp b/gdb/testsuite/gdb.cp/misc.exp index 264294f..bcb20f8 100644 --- a/gdb/testsuite/gdb.cp/misc.exp +++ b/gdb/testsuite/gdb.cp/misc.exp @@ -114,3 +114,7 @@ gdb_test "print *(number_ref + v_bool_array)" "\\$\[0-9\]* = false" \ "integer reference addition with pointer" gdb_test "print *(v_bool_array - number_ref)" "\\$\[0-9\]* = false" \ "pointer subtraction with integer reference" + +# C++14 digit separator. +gdb_test "print 23'23" " = 2323" +gdb_test "print 2'3.5" " = 23.5" -- cgit v1.1