aboutsummaryrefslogtreecommitdiff
path: root/binutils/readelf.c
diff options
context:
space:
mode:
authorNick Clifton <nickc@redhat.com>2021-11-09 13:25:42 +0000
committerNick Clifton <nickc@redhat.com>2021-11-09 13:25:42 +0000
commitb3aa80b45c4f46029efeb204bb9f2d2c4278a0e5 (patch)
tree843d2678476437a35e31a6fef3ff905781fc8d8b /binutils/readelf.c
parent024120b6ee344843b7d02e83e6fc0d5b33a3cd29 (diff)
downloadbinutils-b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5.zip
binutils-b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5.tar.gz
binutils-b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5.tar.bz2
Add --unicode option to control how unicode characters are handled by display tools.
* nm.c: Add --unicode option to control how unicode characters are handled. * objdump.c: Likewise. * readelf.c: Likewise. * strings.c: Likewise. * binutils.texi: Document the new feature. * NEWS: Document the new feature. * testsuite/binutils-all/unicode.exp: New file. * testsuite/binutils-all/nm.hex.unicode * testsuite/binutils-all/strings.escape.unicode * testsuite/binutils-all/objdump.highlight.unicode * testsuite/binutils-all/readelf.invalid.unicode
Diffstat (limited to 'binutils/readelf.c')
-rw-r--r--binutils/readelf.c190
1 files changed, 181 insertions, 9 deletions
diff --git a/binutils/readelf.c b/binutils/readelf.c
index cf3168f..50129f4 100644
--- a/binutils/readelf.c
+++ b/binutils/readelf.c
@@ -329,6 +329,19 @@ typedef enum print_mode
}
print_mode;
+typedef enum unicode_display_type
+{
+ unicode_default = 0,
+ unicode_locale,
+ unicode_escape,
+ unicode_hex,
+ unicode_highlight,
+ unicode_invalid
+} unicode_display_type;
+
+static unicode_display_type unicode_display = unicode_default;
+
+
/* Versioned symbol info. */
enum versioned_symbol_info
{
@@ -656,11 +669,18 @@ print_symbol (signed int width, const char * symbol)
if (c == 0)
break;
- /* Do not print control characters directly as they can affect terminal
- settings. Such characters usually appear in the names generated
- by the assembler for local labels. */
- if (ISCNTRL (c))
+ if (ISPRINT (c))
+ {
+ putchar (c);
+ width_remaining --;
+ num_printed ++;
+ }
+ else if (ISCNTRL (c))
{
+ /* Do not print control characters directly as they can affect terminal
+ settings. Such characters usually appear in the names generated
+ by the assembler for local labels. */
+
if (width_remaining < 2)
break;
@@ -668,11 +688,137 @@ print_symbol (signed int width, const char * symbol)
width_remaining -= 2;
num_printed += 2;
}
- else if (ISPRINT (c))
+ else if (c == 0x7f)
{
- putchar (c);
- width_remaining --;
- num_printed ++;
+ if (width_remaining < 5)
+ break;
+ printf ("<DEL>");
+ width_remaining -= 5;
+ num_printed += 5;
+ }
+ else if (unicode_display != unicode_locale
+ && unicode_display != unicode_default)
+ {
+ /* Display unicode characters as something else. */
+ unsigned char bytes[4];
+ bool is_utf8;
+ uint nbytes;
+
+ bytes[0] = c;
+
+ if (bytes[0] < 0xc0)
+ {
+ nbytes = 1;
+ is_utf8 = false;
+ }
+ else
+ {
+ bytes[1] = *symbol++;
+
+ if ((bytes[1] & 0xc0) != 0x80)
+ {
+ is_utf8 = false;
+ /* Do not consume this character. It may only
+ be the first byte in the sequence that was
+ corrupt. */
+ --symbol;
+ nbytes = 1;
+ }
+ else if ((bytes[0] & 0x20) == 0)
+ {
+ is_utf8 = true;
+ nbytes = 2;
+ }
+ else
+ {
+ bytes[2] = *symbol++;
+
+ if ((bytes[2] & 0xc0) != 0x80)
+ {
+ is_utf8 = false;
+ symbol -= 2;
+ nbytes = 1;
+ }
+ else if ((bytes[0] & 0x10) == 0)
+ {
+ is_utf8 = true;
+ nbytes = 3;
+ }
+ else
+ {
+ bytes[3] = *symbol++;
+
+ nbytes = 4;
+
+ if ((bytes[3] & 0xc0) != 0x80)
+ {
+ is_utf8 = false;
+ symbol -= 3;
+ nbytes = 1;
+ }
+ else
+ is_utf8 = true;
+ }
+ }
+ }
+
+ if (unicode_display == unicode_invalid)
+ is_utf8 = false;
+
+ if (unicode_display == unicode_hex || ! is_utf8)
+ {
+ uint i;
+
+ if (width_remaining < (nbytes * 2) + 2)
+ break;
+
+ putchar (is_utf8 ? '<' : '{');
+ printf ("0x");
+ for (i = 0; i < nbytes; i++)
+ printf ("%02x", bytes[i]);
+ putchar (is_utf8 ? '>' : '}');
+ }
+ else
+ {
+ if (unicode_display == unicode_highlight && isatty (1))
+ printf ("\x1B[31;47m"); /* Red. */
+
+ switch (nbytes)
+ {
+ case 2:
+ if (width_remaining < 6)
+ break;
+ printf ("\\u%02x%02x",
+ (bytes[0] & 0x1c) >> 2,
+ ((bytes[0] & 0x03) << 6) | (bytes[1] & 0x3f));
+ break;
+ case 3:
+ if (width_remaining < 6)
+ break;
+ printf ("\\u%02x%02x",
+ ((bytes[0] & 0x0f) << 4) | ((bytes[1] & 0x3c) >> 2),
+ ((bytes[1] & 0x03) << 6) | (bytes[2] & 0x3f));
+ break;
+ case 4:
+ if (width_remaining < 8)
+ break;
+ printf ("\\u%02x%02x%02x",
+ ((bytes[0] & 0x07) << 6) | ((bytes[1] & 0x3c) >> 2),
+ ((bytes[1] & 0x03) << 6) | ((bytes[2] & 0x3c) >> 2),
+ ((bytes[2] & 0x03) << 6) | (bytes[3] & 0x3f));
+
+ break;
+ default:
+ /* URG. */
+ break;
+ }
+
+ if (unicode_display == unicode_highlight && isatty (1))
+ printf ("\033[0m"); /* Default colour. */
+ }
+
+ if (bytes[nbytes - 1] == 0)
+ break;
}
else
{
@@ -4731,6 +4877,7 @@ static struct option options[] =
{"syms", no_argument, 0, 's'},
{"silent-truncation",no_argument, 0, 'T'},
{"section-details", no_argument, 0, 't'},
+ {"unicode", required_argument, NULL, 'U'},
{"unwind", no_argument, 0, 'u'},
{"version-info", no_argument, 0, 'V'},
{"version", no_argument, 0, 'v'},
@@ -4807,6 +4954,12 @@ usage (FILE * stream)
fprintf (stream, _("\
--no-recurse-limit Disable a demangling recursion limit\n"));
fprintf (stream, _("\
+ -U[dlexhi] --unicode=[default|locale|escape|hex|highlight|invalid]\n\
+ Display unicode characters as determined by the current locale\n\
+ (default), escape sequences, \"<hex sequences>\", highlighted\n\
+ escape sequences, or treat them as invalid and display as\n\
+ \"{hex sequences}\"\n"));
+ fprintf (stream, _("\
-n --notes Display the core notes (if present)\n"));
fprintf (stream, _("\
-r --relocs Display the relocations (if present)\n"));
@@ -4990,7 +5143,7 @@ parse_args (struct dump_data *dumpdata, int argc, char ** argv)
usage (stderr);
while ((c = getopt_long
- (argc, argv, "ACDHILNPR:STVWacdeghi:lnp:rstuvw::x:z", options, NULL)) != EOF)
+ (argc, argv, "ACDHILNPR:STU:VWacdeghi:lnp:rstuvw::x:z", options, NULL)) != EOF)
{
switch (c)
{
@@ -5192,6 +5345,25 @@ parse_args (struct dump_data *dumpdata, int argc, char ** argv)
/* Ignored for backward compatibility. */
break;
+ case 'U':
+ if (optarg == NULL)
+ error (_("Missing arg to -U/--unicode")); /* Can this happen ? */
+ else if (streq (optarg, "default") || streq (optarg, "d"))
+ unicode_display = unicode_default;
+ else if (streq (optarg, "locale") || streq (optarg, "l"))
+ unicode_display = unicode_locale;
+ else if (streq (optarg, "escape") || streq (optarg, "e"))
+ unicode_display = unicode_escape;
+ else if (streq (optarg, "invalid") || streq (optarg, "i"))
+ unicode_display = unicode_invalid;
+ else if (streq (optarg, "hex") || streq (optarg, "x"))
+ unicode_display = unicode_hex;
+ else if (streq (optarg, "highlight") || streq (optarg, "h"))
+ unicode_display = unicode_highlight;
+ else
+ error (_("invalid argument to -U/--unicode: %s"), optarg);
+ break;
+
case OPTION_SYM_BASE:
sym_base = 0;
if (optarg != NULL)