diff options
author | Tom Tromey <tom@tromey.com> | 2025-02-22 14:07:57 -0700 |
---|---|---|
committer | Tom Tromey <tom@tromey.com> | 2025-09-10 16:05:27 -0600 |
commit | cfe3a766e64663091be4a791394269bf353d6d27 (patch) | |
tree | e153c69827a7cdc2da0b5e0dc66eb931a15a88fa /gdb | |
parent | 27de7d7bc3c16137f03a5be2d5da2b7687200eff (diff) | |
download | binutils-cfe3a766e64663091be4a791394269bf353d6d27.zip binutils-cfe3a766e64663091be4a791394269bf353d6d27.tar.gz binutils-cfe3a766e64663091be4a791394269bf353d6d27.tar.bz2 |
Change ada_decode to preserve upper-case in some situations
This patch is needed to avoid regressions later in the series.
The issue here is that ada_decode, when called with wide=false, would
act as though the input needed verbatim quoting. That would happen
because the 'W' character would be passed through; and then a later
loop would reject the result due to that character.
Similarly, with operators=false the upper-case-checking loop would be
skipped, but then some names that did need verbatim quoting would pass
through.
Furthermore I noticed that there isn't a need to distinguish between
the "wide" and "operators" cases -- all callers pass identical values
to both.
This patch cleans up the above, consolidating the parameters and
changing how upper-case detection is handled, so that both the
operator and wide cases pass-through without issue. I've added new
unit tests for this.
Acked-By: Simon Marchi <simon.marchi@efficios.com>
Diffstat (limited to 'gdb')
-rw-r--r-- | gdb/ada-lang.c | 84 | ||||
-rw-r--r-- | gdb/ada-lang.h | 15 | ||||
-rw-r--r-- | gdb/dwarf2/cooked-index-shard.c | 2 | ||||
-rw-r--r-- | gdb/symtab.h | 2 |
4 files changed, 69 insertions, 34 deletions
diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c index b403c5a..c004ecb 100644 --- a/gdb/ada-lang.c +++ b/gdb/ada-lang.c @@ -1310,7 +1310,7 @@ convert_from_hex_encoded (std::string &out, const char *str, int n) /* See ada-lang.h. */ std::string -ada_decode (const char *encoded, bool wrap, bool operators, bool wide) +ada_decode (const char *encoded, bool wrap, bool translate) { int i; int len0; @@ -1405,7 +1405,7 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide) while (i < len0) { /* Is this a symbol function? */ - if (operators && at_start_name && encoded[i] == 'O') + if (at_start_name && encoded[i] == 'O') { int k; @@ -1416,7 +1416,10 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide) op_len - 1) == 0) && !c_isalnum (encoded[i + op_len])) { - decoded.append (ada_opname_table[k].decoded); + if (translate) + decoded.append (ada_opname_table[k].decoded); + else + decoded.append (ada_opname_table[k].encoded); at_start_name = 0; i += op_len; break; @@ -1504,28 +1507,60 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide) i++; } - if (wide && i < len0 + 3 && encoded[i] == 'U' && c_isxdigit (encoded[i + 1])) + /* Handle wide characters while respecting the arguments to the + function: we may want to copy them verbatim, but in this case + we do not want to register that we've copied an upper-case + character. */ + if (i < len0 + 3 && encoded[i] == 'U' && c_isxdigit (encoded[i + 1])) { - if (convert_from_hex_encoded (decoded, &encoded[i + 1], 2)) + if (translate) { - i += 3; + if (convert_from_hex_encoded (decoded, &encoded[i + 1], 2)) + { + i += 3; + continue; + } + } + else + { + decoded.push_back (encoded[i]); + ++i; continue; } } - else if (wide && i < len0 + 5 && encoded[i] == 'W' && c_isxdigit (encoded[i + 1])) + else if (i < len0 + 5 && encoded[i] == 'W' + && c_isxdigit (encoded[i + 1])) { - if (convert_from_hex_encoded (decoded, &encoded[i + 1], 4)) + if (translate) + { + if (convert_from_hex_encoded (decoded, &encoded[i + 1], 4)) + { + i += 5; + continue; + } + } + else { - i += 5; + decoded.push_back (encoded[i]); + ++i; continue; } } - else if (wide && i < len0 + 10 && encoded[i] == 'W' && encoded[i + 1] == 'W' + else if (i < len0 + 10 && encoded[i] == 'W' && encoded[i + 1] == 'W' && c_isxdigit (encoded[i + 2])) { - if (convert_from_hex_encoded (decoded, &encoded[i + 2], 8)) + if (translate) { - i += 10; + if (convert_from_hex_encoded (decoded, &encoded[i + 2], 8)) + { + i += 10; + continue; + } + } + else + { + decoded.push_back (encoded[i]); + ++i; continue; } } @@ -1552,6 +1587,12 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide) at_start_name = 1; i += 2; } + else if (isupper (encoded[i]) || encoded[i] == ' ') + { + /* Decoded names should never contain any uppercase + character. */ + goto Suppress; + } else { /* It's a character part of the decoded name, so just copy it @@ -1561,16 +1602,6 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide) } } - /* Decoded names should never contain any uppercase character. - Double-check this, and abort the decoding if we find one. */ - - if (operators) - { - for (i = 0; i < decoded.length(); ++i) - if (c_isupper (decoded[i]) || decoded[i] == ' ') - goto Suppress; - } - /* If the compiler added a suffix, append it now. */ if (suffix >= 0) decoded = decoded + "[" + &encoded[suffix] + "]"; @@ -1596,6 +1627,13 @@ ada_decode_tests () /* This isn't valid, but used to cause a crash. PR gdb/30639. The result does not really matter very much. */ SELF_CHECK (ada_decode ("44") == "44"); + + /* Check that the settings used by the DWARF reader have the desired + effect. */ + SELF_CHECK (ada_decode ("symada__cS", false, false) == ""); + SELF_CHECK (ada_decode ("pkg__Oxor", false, false) == "pkg.Oxor"); + SELF_CHECK (ada_decode ("pack__func_W017b", false, false) + == "pack.func_W017b"); } #endif @@ -13313,7 +13351,7 @@ ada_lookup_name_info::ada_lookup_name_info (const lookup_name_info &lookup_name) else m_standard_p = false; - m_decoded_name = ada_decode (m_encoded_name.c_str (), true, false, false); + m_decoded_name = ada_decode (m_encoded_name.c_str (), true, false); /* If the name contains a ".", then the user is entering a fully qualified entity name, and the match must not be done in wild diff --git a/gdb/ada-lang.h b/gdb/ada-lang.h index c125fbc..5e08c3b 100644 --- a/gdb/ada-lang.h +++ b/gdb/ada-lang.h @@ -218,16 +218,13 @@ extern const char *ada_decode_symbol (const struct general_symbol_info *); simply wrapped in <...>. If WRAP is false, then the empty string will be returned. - When OPERATORS is false, operator names will not be decoded. By - default, they are decoded, e.g., 'Oadd' will be transformed to - '"+"'. - - When WIDE is false, wide characters will be left as-is. By - default, they converted from their hex encoding to the host - charset. */ + TRANSLATE has two effects. When true (the default), operator names + and wide characters will be decoded. E.g., 'Oadd' will be + transformed to '"+"', and wide characters converted from their hex + encoding to the host charset. When false, these will be left + alone. */ extern std::string ada_decode (const char *name, bool wrap = true, - bool operators = true, - bool wide = true); + bool translate = true); extern std::vector<struct block_symbol> ada_lookup_symbol_list (const char *, const struct block *, domain_search_flags); diff --git a/gdb/dwarf2/cooked-index-shard.c b/gdb/dwarf2/cooked-index-shard.c index c717bbb..8359540 100644 --- a/gdb/dwarf2/cooked-index-shard.c +++ b/gdb/dwarf2/cooked-index-shard.c @@ -108,7 +108,7 @@ cooked_index_shard::handle_gnat_encoded_entry characters are left as-is. This is done to make name matching a bit simpler; and for wide characters, it means the choice of Ada source charset does not affect the indexer directly. */ - std::string canonical = ada_decode (entry->name, false, false, false); + std::string canonical = ada_decode (entry->name, false, false); if (canonical.empty ()) { entry->canonical = entry->name; diff --git a/gdb/symtab.h b/gdb/symtab.h index 25f2aaf..09a361d 100644 --- a/gdb/symtab.h +++ b/gdb/symtab.h @@ -145,7 +145,7 @@ private: std::string m_encoded_name; /* The decoded lookup name. This is formed by calling ada_decode - with both 'operators' and 'wide' set to false. */ + with 'translate' set to false. */ std::string m_decoded_name; /* Whether the user-provided lookup name was Ada encoded. If so, |