Add --unicode option to control how unicode characters are handled by display tools.

* nm.c: Add --unicode option to control how unicode characters are handled. * objdump.c: Likewise. * readelf.c: Likewise. * strings.c: Likewise. * binutils.texi: Document the new feature. * NEWS: Document the new feature. * testsuite/binutils-all/unicode.exp: New file. * testsuite/binutils-all/nm.hex.unicode * testsuite/binutils-all/strings.escape.unicode * testsuite/binutils-all/objdump.highlight.unicode * testsuite/binutils-all/readelf.invalid.unicode
author: Nick Clifton <nickc@redhat.com> 2021-11-09 13:25:42 +0000
committer: Nick Clifton <nickc@redhat.com> 2021-11-09 13:25:42 +0000
commit: b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5 (patch)
tree: 843d2678476437a35e31a6fef3ff905781fc8d8b /binutils/doc
parent: 024120b6ee344843b7d02e83e6fc0d5b33a3cd29 (diff)
download: gdb-b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5.zip
gdb-b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5.tar.gz
gdb-b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5.tar.bz2
1 files changed, 78 insertions, 0 deletions
diff --git a/binutils/doc/binutils.texi b/binutils/doc/binutils.texi
index 504c3ea..5de0631 100644
--- a/binutils/doc/binutils.texi
+++ b/binutils/doc/binutils.texi
@@ -812,6 +812,7 @@ nm [@option{-A}|@option{-o}|@option{--print-file-name}]
    [@option{-s}|@option{--print-armap}]
    [@option{-t} @var{radix}|@option{--radix=}@var{radix}]
    [@option{-u}|@option{--undefined-only}]
+   [@option{-U} @var{method}] [@option{--unicode=}@var{method}]
    [@option{-V}|@option{--version}]
    [@option{-X 32_64}]
    [@option{--defined-only}]
@@ -1132,6 +1133,21 @@ Use @var{radix} as the radix for printing the symbol values.  It must be
 @cindex undefined symbols
 Display only undefined symbols (those external to each object file).
 
+@item -U @var{[d|i|l|e|x|h]}
+@itemx --unicode=@var{[default|invalid|locale|escape|hex|highlight]}
+Controls the display of UTF-8 encoded mulibyte characters in strings.
+The default (@option{--unicode=default}) is to give them no special
+treatment.  The @option{--unicode=locale} option displays the sequence
+in the current locale, which may or may not support them.  The options
+@option{--unicode=hex} and @option{--unicode=invalid} display them as
+hex byte sequences enclosed by either angle brackets or curly braces.
+
+The @option{--unicode=escape} option displays them as escape sequences
+(@var{\uxxxx}) and the @option{--unicode=highlight} option displays
+them as escape sequences highlighted in red (if supported by the
+output device).  The colouring is intended to draw attention to the
+presence of unicode sequences where they might not be expected.
+
 @item -V
 @itemx --version
 Show the version number of @command{nm} and exit.
@@ -2247,6 +2263,7 @@ objdump [@option{-a}|@option{--archive-headers}]
         [@option{--prefix-strip=}@var{level}]
         [@option{--insn-width=}@var{width}]
         [@option{--visualize-jumps[=color|=extended-color|=off]}
+        [@option{-U} @var{method}] [@option{--unicode=}@var{method}]
         [@option{-V}|@option{--version}]
         [@option{-H}|@option{--help}]
         @var{objfile}@dots{}
@@ -2921,6 +2938,21 @@ When displaying symbols include those which the target considers to be
 special in some way and which would not normally be of interest to the
 user.
 
+@item -U @var{[d|i|l|e|x|h]}
+@itemx --unicode=@var{[default|invalid|locale|escape|hex|highlight]}
+Controls the display of UTF-8 encoded mulibyte characters in strings.
+The default (@option{--unicode=default}) is to give them no special
+treatment.  The @option{--unicode=locale} option displays the sequence
+in the current locale, which may or may not support them.  The options
+@option{--unicode=hex} and @option{--unicode=invalid} display them as
+hex byte sequences enclosed by either angle brackets or curly braces.
+
+The @option{--unicode=escape} option displays them as escape sequences
+(@var{\uxxxx}) and the @option{--unicode=highlight} option displays
+them as escape sequences highlighted in red (if supported by the
+output device).  The colouring is intended to draw attention to the
+presence of unicode sequences where they might not be expected.
+
 @item -V
 @itemx --version
 Print the version number of @command{objdump} and exit.
@@ -3197,6 +3229,7 @@ strings [@option{-afovV}] [@option{-}@var{min-len}]
         [@option{-n} @var{min-len}] [@option{--bytes=}@var{min-len}]
         [@option{-t} @var{radix}] [@option{--radix=}@var{radix}]
         [@option{-e} @var{encoding}] [@option{--encoding=}@var{encoding}]
+        [@option{-U} @var{method}] [@option{--unicode=}@var{method}]
         [@option{-}] [@option{--all}] [@option{--print-file-name}]
         [@option{-T} @var{bfdname}] [@option{--target=}@var{bfdname}]
         [@option{-w}] [@option{--include-all-whitespace}]
@@ -3288,6 +3321,28 @@ single-8-bit-byte characters, @samp{b} = 16-bit bigendian, @samp{l} =
 littleendian.  Useful for finding wide character strings. (@samp{l}
 and @samp{b} apply to, for example, Unicode UTF-16/UCS-2 encodings).
 
+@item -U @var{[d|i|l|e|x|h]}
+@itemx --unicode=@var{[default|invalid|locale|escape|hex|highlight]}
+Controls the display of UTF-8 encoded mulibyte characters in strings.
+The default (@option{--unicode=default}) is to give them no special
+treatment, and instead rely upon the setting of the
+@option{--encoding} option.  The other values for this option
+automatically enable @option{--encoding=S}.
+
+The @option{--unicode=invalid} option treats them as non-graphic
+characters and hence not part of a valid string.  All the remaining
+options treat them as valid string characters.
+
+The @option{--unicode=locale} option displays them in the current
+locale, which may or may not support UTF-8 encoding.  The
+@option{--unicode=hex} option displays them as hex byte sequences
+enclosed between @var{<>} characters.  The @option{--unicode=escape}
+option displays them as escape sequences (@var{\uxxxx}) and the
+@option{--unicode=highlight} option displays them as escape sequences
+highlighted in red (if supported by the output device).  The colouring
+is intended to draw attention to the presence of unicode sequences
+where they might not be expected.
+
 @item -T @var{bfdname}
 @itemx --target=@var{bfdname}
 @cindex object code format
@@ -4796,6 +4851,7 @@ readelf [@option{-a}|@option{--all}]
         [@option{--demangle@var{=style}}|@option{--no-demangle}]
         [@option{--quiet}]
         [@option{--recurse-limit}|@option{--no-recurse-limit}]
+        [@option{-U} @var{method}|@option{--unicode=}@var{method}]
         [@option{-n}|@option{--notes}]
         [@option{-r}|@option{--relocs}]
         [@option{-u}|@option{--unwind}]
@@ -4962,6 +5018,28 @@ necessary in order to demangle truly complicated names.  Note however
 that if the recursion limit is disabled then stack exhaustion is
 possible and any bug reports about such an event will be rejected.
 
+@item -U @var{[d|i|l|e|x|h]}
+@itemx --unicode=[default|invalid|locale|escape|hex|highlight]
+Controls the display of non-ASCII characters in identifier names.
+The default (@option{--unicode=locale} or @option{--unicode=default}) is
+to treat them as multibyte characters and display them in the current
+locale.  All other versions of this option treat the bytes as UTF-8
+encoded values and attempt to interpret them.  If they cannot be
+interpreted or if the @option{--unicode=invalid} option is used then
+they are displayed as a sequence of hex bytes, encloses in curly
+parethesis characters.
+
+Using the @option{--unicode=escape} option will display the characters
+as as unicode escape sequences (@var{\uxxxx}).  Using the
+@option{--unicode=hex} will display the characters as hex byte
+sequences enclosed between angle brackets.
+
+Using the @option{--unicode=highlight} will display the characters as 
+unicode escape sequences but it will also highlighted them in red,
+assuming that colouring is supported by the output device.  The
+colouring is intended to draw attention to the presence of unicode
+sequences when they might not be expected.
+
 @item -e
 @itemx --headers
 Display all the headers in the file.  Equivalent to @option{-h -l -S}.
author	Nick Clifton <nickc@redhat.com>	2021-11-09 13:25:42 +0000
committer	Nick Clifton <nickc@redhat.com>	2021-11-09 13:25:42 +0000
commit	b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5 (patch)
tree	843d2678476437a35e31a6fef3ff905781fc8d8b /binutils/doc
parent	024120b6ee344843b7d02e83e6fc0d5b33a3cd29 (diff)
download	gdb-b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5.zip gdb-b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5.tar.gz gdb-b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5.tar.bz2