diff options
Diffstat (limited to 'gdb')
63 files changed, 4431 insertions, 1951 deletions
diff --git a/gdb/ChangeLog b/gdb/ChangeLog index 9d5e5f4..1786f7e 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -1,4 +1,218 @@ 2009-03-20 Tom Tromey <tromey@redhat.com> + Julian Brown <julian@codesourcery.com> + + PR i18n/7220, PR i18n/7821, PR exp/8815, PR exp/9103, + PR i18n/9401, PR exp/9613: + * NEWS: Update + * value.h (value_typed_string): Declare. + (val_print_string): Update. + * valprint.h (print_char_chars): Update. + * valprint.c (print_char_chars): Add type argument. Update. + (val_print_string): Likewise. + * valops.c (value_typed_string): New function. + * utils.c (host_char_to_target): New function. + (parse_escape): Use host_char_to_target, host_hex_value. Update. + Remove '^' case. + (no_control_char_error): Remove. + * typeprint.c (print_type_scalar): Update. + * scm-valprint.c (scm_scmval_print): Update. + * scm-lang.h (scm_printchar, scm_printstr): Update. + * scm-lang.c (scm_printchar): Add type argument. + (scm_printstr): Likewise. + * printcmd.c (print_formatted): Update. + (print_scalar_formatted): Update. + (printf_command) <wide_string_arg, wide_char_arg>: New constants. + Handle '%lc' and '%ls'. + * parser-defs.h (struct typed_stoken): New type. + (struct stoken_vector): Likewise. + (write_exp_string_vector): Declare. + * parse.c (write_exp_string_vector): New function. + * p-valprint.c (pascal_val_print): Update. + * p-lang.h (is_pascal_string_type, pascal_printchar, + pascal_printstr): Update. + * p-lang.c (is_pascal_string_type): Remove 'char_size' argument. + Add 'char_type' argument. + (pascal_emit_char): Add type argument. + (pascal_printchar): Likewise. + (pascal_printstr): Likewise. + * objc-lang.c (objc_emit_char): Add type argument. + (objc_printchar): Likewise. + (objc_printstr): Likewise. + * macroexp.c (get_character_constant): Handle unicode characters. + Use c_parse_escape. + (get_string_literal): Handle unicode strings. Use + c_parse_escape. + * m2-valprint.c (print_unpacked_pointer): Update. + (m2_print_array_contents): Update. + (m2_val_print): Update. + * m2-lang.c (m2_emit_char): Add type argument. + (m2_printchar): Likewise. + (m2_printstr): Likewise. + * language.h (struct language_defn) <la_printchar>: Add type + argument. + <la_printstr, la_emitchar>: Likewise. + (LA_PRINT_CHAR): Likewise. + (LA_PRINT_STRING): Likewise. + (LA_EMIT_CHAR): Likewise. + * language.c (unk_lang_emit_char): Add type argument. + (unk_lang_printchar): Likewise. + (unk_lang_printstr): Likewise. + * jv-valprint.c (java_val_print): Update. + * jv-lang.c (java_emit_char): Add type argument. + * f-valprint.c (f_val_print): Update. + * f-lang.c (f_emit_char): Add type argument. + (f_printchar): Likewise. + (f_printstr): Likewise. + * expprint.c (print_subexp_standard): Update. + * charset.h (target_wide_charset): Declare. + (c_target_char_has_backslash_escape, c_parse_backslash, + host_char_print_literally, host_char_to_target, + target_char_to_host, target_char_to_control_char): Remove. + (enum transliterations): New type. + (convert_between_encodings): Declare. + (HOST_ESCAPE_CHAR): New define. + (host_letter_to_control_character, host_hex_value): Declare. + (enum wchar_iterate_result): New enum. + (struct wchar_iterator): Declare. + (make_wchar_iterator, make_cleanup_wchar_iterator, wchar_iterator, + wchar_push_back): Declare. + * charset-list.h: New file. + * c-valprint.c (textual_name): New function. + (textual_element_type): Handle wide character types. + (c_val_print): Pass original type to textual_element_type. Handle + wide character types. + (c_value_print): Use textual_element_type. Pass original type of + value to val_print. + * c-lang.h (enum c_string_type): New type. + (c_printchar, c_printstr): Update. + * c-lang.c (classify_type): New function. + (print_wchar): Likewise. + (c_emit_char): Add type argument. Handle wide characters. + (c_printchar): Likewise. + (c_printstr): Add type argument. Handle wide and multibyte + character sets. + (convert_ucn): New function. + (emit_numeric_character): Likewise. + (convert_octal): Likewise. + (convert_hex): Likewise. + (ADVANCE): New macro. + (convert_escape): New function. + (parse_one_string): Likewise. + (evaluate_subexp_c): Likewise. + (exp_descriptor_c): New global. + (c_language_defn): Use exp_descriptor_c. + (cplus_language_defn): Likewise. + (asm_language_defn): Likewise. + (minimal_language_defn): Likewise. + (charset_for_string_type): New function. + * c-exp.y (%union): Add 'svec' and 'tsval'. + (CHAR): New token. + (exp): Add CHAR production. + (string_exp): Rewrite. + (exp) <string_exp>: Rewrite. + (tempbuf): Now global. + (tempbuf_init): New global. + (parse_string_or_char): New function. + (yylex) <tempbuf>: Now global. + <tokptr, tempbufindex, tempbufsize, token_string, class_prefix>: + Remove. + Handle 'u', 'U', and 'L' prefixes. Call parse_string_or_char. + (c_parse_escape): New function. + * auxv.c (fprint_target_auxv): Update. + * ada-valprint.c (ada_emit_char): Add type argument. + (ada_printchar): Likewise. + (ada_print_scalar): Update. + (printstr): Add type argument. Update calls to ada_emit_char. + (ada_printstr): Add type argument. + (ada_val_print_array): Update. + (ada_val_print_1): Likewise. + * ada-lang.c (emit_char): Add type argument. + * ada-lang.h (ada_emit_char, ada_printchar, ada_printstr): Add + type arguments. + * gdb_locale.h: Include langinfo.h. + * charset.c (_initialize_charset): Set default host charset from + the locale. Don't register charsets. Add target-wide-charset + commands. Call find_charset_names. + (struct charset, struct translation): Remove. + (GDB_DEFAULT_HOST_CHARSET): Remove. + (GDB_DEFAULT_TARGET_WIDE_CHARSET): New define. + (target_wide_charset_name): New global. + (show_host_charset_name): Handle "auto". + (show_target_wide_charset_name): New function. + (host_charset_enum, target_charset_enum): Remove. + (charset_enum): New global. + (all_charsets, register_charset, lookup_charset, all_translations, + register_translation, lookup_translation): Remove. + (simple_charset, ascii_print_literally, ascii_to_control): Remove. + (iso_8859_print_literally, iso_8859_to_control, + iso_8859_family_charset): Remove. + (ebcdic_print_literally, ebcdic_to_control, + ebcdic_family_charset): Remove. + (struct cached_iconv, check_iconv_cache, cached_iconv_convert, + register_iconv_charsets): Remove. + (target_wide_charset_be_name, target_wide_charset_le_name): New + globals. + (identity_either_char_to_other): Remove. + (set_be_le_names, validate): New functions. + (backslashable, backslashed, represented): Remove. + (default_c_target_char_has_backslash_escape): Remove. + (default_c_parse_backslash, iconv_convert): Remove. + (ascii_to_iso_8859_1_table, ascii_to_ebcdic_us_table, + ascii_to_ibm1047_table, iso_8859_1_to_ascii_table, + iso_8859_1_to_ebcdic_us_table, iso_8859_1_to_ibm1047_table, + ebcdic_us_to_ascii_table, ebcdic_us_to_iso_8859_1_table, + ebcdic_us_to_ibm1047_table, ibm1047_to_ascii_table, + ibm1047_to_iso_8859_1_table, ibm1047_to_ebcdic_us_table): Remove. + (table_convert_char, table_translation, simple_table_translation): + Remove. + (current_host_charset, current_target_charset, + c_target_char_has_backslash_escape_func, + c_target_char_has_backslash_escape_baton): Remove. + (c_parse_backslash_func, c_parse_backslash_baton): Remove. + (host_char_to_target_func, host_char_to_target_baton): Remove. + (target_char_to_host_func, target_char_to_host_baton): Remove. + (cached_iconv_host_to_target, cached_iconv_target_to_host): + Remove. + (lookup_charset_or_error, check_valid_host_charset): Remove. + (set_host_and_target_charsets): Remove. + (set_host_charset, set_target_charset): Remove. + (set_host_charset_sfunc, set_target_charset_sfunc): Rewrite. + (set_target_wide_charset_sfunc): New function. + (show_charset): Print target wide character set. + (host_charset, target_charset): Rewrite. + (target_wide_charset): New function. + (c_target_char_has_backslash_escape): Remove. + (c_parse_backslash): Remove. + (host_letter_to_control_character): New function. + (host_char_print_literally): Remove. + (host_hex_value): New function. + (target_char_to_control_char): Remove. + (cleanup_iconv): New function. + (convert_between_encodings): New function. + (target_char_to_host): Remove. + (struct wchar_iterator): Define. + (make_wchar_iterator, make_cleanup_wchar_iterator, wchar_iterator, + wchar_push_back): New functions. + (do_cleanup_iterator): New function. + (char_ptr): New typedef. + (charsets): New global. + (add_one, find_charset_names): New functions. + (default_charset_names): New global. + (auto_host_charset_name): Likewise. + * aclocal.m4, config.in, configure: Rebuild. + * configure.ac: Call AM_LANGINFO_CODESET. + (GDB_DEFAULT_HOST_CHARSET): Default to UTF-8. + (AM_ICONV): Invoke earlier. + * acinclude.m4: Include codeset.m4. Subst LIBICONV_INCLUDE and + LIBICONV_LIBDIR. Check for libiconv in build tree. + * Makefile.in (LIBICONV_LIBDIR, LIBICONV_INCLUDE): New macros. + (INTERNAL_CFLAGS_BASE): Add LIBICONV_INCLUDE. + (INTERNAL_LDFLAGS): Add LIBICONV_LIBDIR. + * gdb_obstack.h (obstack_grow_wstr): New define. + * gdb_wchar.h: New file. + * defs.h: Include it. + +2009-03-20 Tom Tromey <tromey@redhat.com> Jan Kratochvil <jan.kratochvil@redhat.com> * dwarf2read.c (process_die): Handle DW_TAG_typedef. diff --git a/gdb/Makefile.in b/gdb/Makefile.in index 447906c..5c79036 100644 --- a/gdb/Makefile.in +++ b/gdb/Makefile.in @@ -165,6 +165,8 @@ INTL_CFLAGS = @INCINTL@ # Where is the ICONV library? This can be empty if libc has iconv. LIBICONV = @LIBICONV@ +LIBICONV_INCLUDE = @LIBICONV_INCLUDE@ +LIBICONV_LIBDIR = @LIBICONV_LIBDIR@ # Did the user give us a --with-sysroot option? TARGET_SYSTEM_ROOT = @TARGET_SYSTEM_ROOT@ @@ -388,7 +390,8 @@ INTERNAL_CFLAGS_BASE = \ $(CFLAGS) $(GLOBAL_CFLAGS) $(PROFILE_CFLAGS) \ $(GDB_CFLAGS) $(OPCODES_CFLAGS) $(READLINE_CFLAGS) \ $(BFD_CFLAGS) $(INCLUDE_CFLAGS) $(LIBDECNUMBER_CFLAGS) \ - $(INTL_CFLAGS) $(INCGNU) $(ENABLE_CFLAGS) $(INTERNAL_CPPFLAGS) + $(INTL_CFLAGS) $(INCGNU) $(ENABLE_CFLAGS) $(INTERNAL_CPPFLAGS) \ + $(LIBICONV_INCLUDE) INTERNAL_WARN_CFLAGS = $(INTERNAL_CFLAGS_BASE) $(GDB_WARN_CFLAGS) INTERNAL_CFLAGS = $(INTERNAL_WARN_CFLAGS) $(GDB_WERROR_CFLAGS) @@ -400,7 +403,7 @@ LDFLAGS = @LDFLAGS@ # I think it's perfectly reasonable for a user to set -pg in CFLAGS # and have it work; that's why CFLAGS is here. # PROFILE_CFLAGS is _not_ included, however, because we use monstartup. -INTERNAL_LDFLAGS = $(CFLAGS) $(GLOBAL_CFLAGS) $(MH_LDFLAGS) $(LDFLAGS) $(CONFIG_LDFLAGS) +INTERNAL_LDFLAGS = $(CFLAGS) $(GLOBAL_CFLAGS) $(MH_LDFLAGS) $(LDFLAGS) $(CONFIG_LDFLAGS) $(LIBICONV_LIBDIR) # If your system is missing alloca(), or, more likely, it's there but # it doesn't work, then refer to libiberty. @@ -3,6 +3,15 @@ *** Changes since GDB 6.8 +* GDB now has support for multi-byte and wide character sets on the +target. Strings whose character type is wchar_t, char16_t, or +char32_t are now correctly printed. GDB supports wide- and unicode- +literals in C, that is, L'x', L"string", u'x', u"string", U'x', and +U"string" syntax. And, GDB allows the "%ls" and "%lc" formats in +`printf'. This feature requires iconv to work properly; if your +system does not have a working iconv, GDB can use GNU libiconv. See +the installation instructions for more information. + * GDB now supports automatic retrieval of shared library files from remote targets. To use this feature, specify a system root that begins with the `remote:' prefix, either via the `set sysroot' command or via @@ -182,6 +191,11 @@ set target-async with GDB while the target is running. "show target-async" displays the current state of asynchronous execution of the target. +set target-wide-charset +show target-wide-charset + The target-wide-charset is the name of the character set that GDB + uses when printing characters whose type is wchar_t. + set tcp auto-retry (on|off) show tcp auto-retry set tcp connect-timeout diff --git a/gdb/acinclude.m4 b/gdb/acinclude.m4 index 81b5d47..a2c262e 100644 --- a/gdb/acinclude.m4 +++ b/gdb/acinclude.m4 @@ -29,6 +29,9 @@ sinclude([../config/depstand.m4]) dnl For AM_LC_MESSAGES sinclude([../config/lcmessage.m4]) +dnl For AM_LANGINFO_CODESET. +sinclude([../config/codeset.m4]) + # # Sometimes the native compiler is a bogus stub for gcc or /usr/ucb/cc. This # makes configure think it's cross compiling. If --target wasn't used, then @@ -174,14 +177,18 @@ AC_DEFUN([AM_ICONV], AC_ARG_WITH([libiconv-prefix], [ --with-libiconv-prefix=DIR search for libiconv in DIR/include and DIR/lib], [ for dir in `echo "$withval" | tr : ' '`; do - if test -d $dir/include; then CPPFLAGS="$CPPFLAGS -I$dir/include"; fi - if test -d $dir/lib; then LDFLAGS="$LDFLAGS -L$dir/lib"; fi + if test -d $dir/include; then LIBICONV_INCLUDE="-I$dir/include"; CPPFLAGS="$CPPFLAGS -I$dir/include"; fi + if test -d $dir/lib; then LIBICONV_LIBDIR="-L$dir/lib"; LDFLAGS="$LDFLAGS -L$dir/lib"; fi done ]) + BUILD_LIBICONV_LIBDIR="-L../libiconv/lib/.libs -L../libiconv/lib/_libs" + BUILD_LIBICONV_INCLUDE="-I../libiconv/include" + AC_CACHE_CHECK(for iconv, am_cv_func_iconv, [ am_cv_func_iconv="no, consider installing GNU libiconv" am_cv_lib_iconv=no + am_cv_use_build_libiconv=no AC_TRY_LINK([#include <stdlib.h> #include <iconv.h>], [iconv_t cd = iconv_open("",""); @@ -200,6 +207,26 @@ AC_DEFUN([AM_ICONV], am_cv_func_iconv=yes) LIBS="$am_save_LIBS" fi + # Look for libiconv in the build tree. + if test "$am_cv_func_iconv" != yes && test -d ../libiconv; then + am_save_LIBS="$LIBS" + am_save_CPPFLAGS="$CPPFLAGS" + LIBS="$LIBS $BUILD_LIBICONV_LIBDIR -liconv" + CPPFLAGS="$CPPFLAGS $BUILD_LIBICONV_INCLUDE" + AC_TRY_LINK([#include <stdlib.h> +#include <iconv.h>], + [iconv_t cd = iconv_open("",""); + iconv(cd,NULL,NULL,NULL,NULL); + iconv_close(cd);], + am_cv_lib_iconv=yes + am_cv_func_iconv=yes) + LIBS="$am_save_LIBS" + if test "$am_cv_func_iconv" = "yes"; then + am_cv_use_build_libiconv=yes + else + CPPFLAGS="$am_save_CPPFLAGS" + fi + fi ]) if test "$am_cv_func_iconv" = yes; then AC_DEFINE(HAVE_ICONV, 1, [Define if you have the iconv() function.]) @@ -229,7 +256,13 @@ size_t iconv(); if test "$am_cv_lib_iconv" = yes; then LIBICONV="-liconv" fi + if test "$am_cv_use_build_libiconv" = yes; then + LIBICONV_LIBDIR="$BUILD_LIBICONV_LIBDIR" + LIBICONV_INCLUDE="$BUILD_LIBICONV_INCLUDE" + fi AC_SUBST(LIBICONV) + AC_SUBST(LIBICONV_INCLUDE) + AC_SUBST(LIBICONV_LIBDIR) ]) dnl written by Guido Draheim <guidod@gmx.de>, original by Alexandre Oliva diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c index e4fdd24..0800454 100644 --- a/gdb/ada-lang.c +++ b/gdb/ada-lang.c @@ -11022,9 +11022,9 @@ ada_language_arch_info (struct gdbarch *gdbarch, /* Not really used, but needed in the ada_language_defn. */ static void -emit_char (int c, struct ui_file *stream, int quoter) +emit_char (int c, struct type *type, struct ui_file *stream, int quoter) { - ada_emit_char (c, stream, quoter, 1); + ada_emit_char (c, type, stream, quoter, 1); } static int diff --git a/gdb/ada-lang.h b/gdb/ada-lang.h index 50f90fb..88b6c16 100644 --- a/gdb/ada-lang.h +++ b/gdb/ada-lang.h @@ -255,12 +255,12 @@ extern int ada_value_print (struct value *, struct ui_file *, /* Defined in ada-lang.c */ -extern void ada_emit_char (int, struct ui_file *, int, int); +extern void ada_emit_char (int, struct type *, struct ui_file *, int, int); -extern void ada_printchar (int, struct ui_file *); +extern void ada_printchar (int, struct type *, struct ui_file *); -extern void ada_printstr (struct ui_file *, const gdb_byte *, - unsigned int, int, int, +extern void ada_printstr (struct ui_file *, struct type *, const gdb_byte *, + unsigned int, int, const struct value_print_options *); struct value *ada_convert_actual (struct value *actual, diff --git a/gdb/ada-valprint.c b/gdb/ada-valprint.c index 9647971..3da58ea 100644 --- a/gdb/ada-valprint.c +++ b/gdb/ada-valprint.c @@ -269,7 +269,8 @@ printable_val_type (struct type *type, const gdb_byte *valaddr) (1 or 2) of the character. */ void -ada_emit_char (int c, struct ui_file *stream, int quoter, int type_len) +ada_emit_char (int c, struct type *type, struct ui_file *stream, + int quoter, int type_len) { if (type_len != 2) type_len = 1; @@ -366,10 +367,10 @@ ada_print_floating (const gdb_byte *valaddr, struct type *type, } void -ada_printchar (int c, struct ui_file *stream) +ada_printchar (int c, struct type *type, struct ui_file *stream) { fputs_filtered ("'", stream); - ada_emit_char (c, stream, '\'', 1); + ada_emit_char (c, type, stream, '\'', 1); fputs_filtered ("'", stream); } @@ -411,7 +412,7 @@ ada_print_scalar (struct type *type, LONGEST val, struct ui_file *stream) break; case TYPE_CODE_CHAR: - LA_PRINT_CHAR ((unsigned char) val, stream); + LA_PRINT_CHAR ((unsigned char) val, type, stream); break; case TYPE_CODE_BOOL: @@ -454,7 +455,7 @@ ada_print_scalar (struct type *type, LONGEST val, struct ui_file *stream) */ static void -printstr (struct ui_file *stream, const gdb_byte *string, +printstr (struct ui_file *stream, struct type *elttype, const gdb_byte *string, unsigned int length, int force_ellipses, int type_len, const struct value_print_options *options) { @@ -506,7 +507,7 @@ printstr (struct ui_file *stream, const gdb_byte *string, in_quotes = 0; } fputs_filtered ("'", stream); - ada_emit_char (char_at (string, i, type_len), stream, '\'', + ada_emit_char (char_at (string, i, type_len), elttype, stream, '\'', type_len); fputs_filtered ("'", stream); fprintf_filtered (stream, _(" <repeats %u times>"), reps); @@ -524,7 +525,7 @@ printstr (struct ui_file *stream, const gdb_byte *string, fputs_filtered ("\"", stream); in_quotes = 1; } - ada_emit_char (char_at (string, i, type_len), stream, '"', + ada_emit_char (char_at (string, i, type_len), elttype, stream, '"', type_len); things_printed += 1; } @@ -544,11 +545,12 @@ printstr (struct ui_file *stream, const gdb_byte *string, } void -ada_printstr (struct ui_file *stream, const gdb_byte *string, - unsigned int length, int width, int force_ellipses, +ada_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string, + unsigned int length, int force_ellipses, const struct value_print_options *options) { - printstr (stream, string, length, force_ellipses, width, options); + printstr (stream, type, string, length, force_ellipses, TYPE_LENGTH (type), + options); } @@ -637,7 +639,7 @@ ada_val_print_array (struct type *type, const gdb_byte *valaddr, len = temp_len; } - printstr (stream, valaddr, len, 0, eltlen, options); + printstr (stream, elttype, valaddr, len, 0, eltlen, options); result = len; } else @@ -817,7 +819,7 @@ ada_val_print_1 (struct type *type, const gdb_byte *valaddr0, { fputs_filtered (" ", stream); ada_printchar ((unsigned char) unpack_long (type, valaddr), - stream); + type, stream); } } return 0; @@ -248,7 +248,8 @@ fprint_target_auxv (struct ui_file *file, struct target_ops *ops) get_user_print_options (&opts); if (opts.addressprint) fprintf_filtered (file, "0x%s", paddr_nz (val)); - val_print_string (val, -1, 1, file, &opts); + val_print_string (builtin_type (target_gdbarch)->builtin_char, + val, -1, file, &opts); fprintf_filtered (file, "\n"); } break; diff --git a/gdb/c-exp.y b/gdb/c-exp.y index 04911ec..92d6c21 100644 --- a/gdb/c-exp.y +++ b/gdb/c-exp.y @@ -141,6 +141,7 @@ void yyerror (char *); struct symbol *sym; struct type *tval; struct stoken sval; + struct typed_stoken tsval; struct ttype tsym; struct symtoken ssym; int voidval; @@ -148,6 +149,7 @@ void yyerror (char *); enum exp_opcode opcode; struct internalvar *ivar; + struct stoken_vector svec; struct type **tvec; int *ivec; } @@ -180,11 +182,13 @@ static int parse_number (char *, int, int, YYSTYPE *); Contexts where this distinction is not important can use the nonterminal "name", which matches either NAME or TYPENAME. */ -%token <sval> STRING +%token <tsval> STRING +%token <tsval> CHAR %token <ssym> NAME /* BLOCKNAME defined below to give it higher precedence. */ %token <voidval> COMPLETE %token <tsym> TYPENAME -%type <sval> name string_exp +%type <sval> name +%type <svec> string_exp %type <ssym> name_not_typename %type <tsym> typename @@ -522,6 +526,15 @@ exp : INT write_exp_elt_opcode (OP_LONG); } ; +exp : CHAR + { + struct stoken_vector vec; + vec.len = 1; + vec.tokens = &$1; + write_exp_string_vector ($1.type, &vec); + } + ; + exp : NAME_OR_INT { YYSTYPE val; parse_number ($1.stoken.ptr, $1.stoken.length, 0, &val); @@ -570,48 +583,64 @@ string_exp: string. Note that we follow the NUL-termination convention of the lexer. */ - $$.length = $1.length; - $$.ptr = malloc ($1.length + 1); - memcpy ($$.ptr, $1.ptr, $1.length + 1); + struct typed_stoken *vec = XNEW (struct typed_stoken); + $$.len = 1; + $$.tokens = vec; + + vec->type = $1.type; + vec->length = $1.length; + vec->ptr = malloc ($1.length + 1); + memcpy (vec->ptr, $1.ptr, $1.length + 1); } | string_exp STRING { /* Note that we NUL-terminate here, but just for convenience. */ - struct stoken t; - t.length = $1.length + $2.length; - t.ptr = malloc (t.length + 1); - memcpy (t.ptr, $1.ptr, $1.length); - memcpy (t.ptr + $1.length, $2.ptr, $2.length + 1); - free ($1.ptr); - $$ = t; + char *p; + ++$$.len; + $$.tokens = realloc ($$.tokens, + $$.len * sizeof (struct typed_stoken)); + + p = malloc ($2.length + 1); + memcpy (p, $2.ptr, $2.length + 1); + + $$.tokens[$$.len - 1].type = $2.type; + $$.tokens[$$.len - 1].length = $2.length; + $$.tokens[$$.len - 1].ptr = p; } ; exp : string_exp - { /* C strings are converted into array constants with - an explicit null byte added at the end. Thus - the array upper bound is the string length. - There is no such thing in C as a completely empty - string. */ - char *sp = $1.ptr; int count = $1.length; - while (count-- > 0) + { + int i; + enum c_string_type type = C_STRING; + + for (i = 0; i < $1.len; ++i) { - write_exp_elt_opcode (OP_LONG); - write_exp_elt_type (parse_type->builtin_char); - write_exp_elt_longcst ((LONGEST)(*sp++)); - write_exp_elt_opcode (OP_LONG); + switch ($1.tokens[i].type) + { + case C_STRING: + break; + case C_WIDE_STRING: + case C_STRING_16: + case C_STRING_32: + if (type != C_STRING + && type != $1.tokens[i].type) + error ("Undefined string concatenation."); + type = $1.tokens[i].type; + break; + default: + /* internal error */ + internal_error (__FILE__, __LINE__, + "unrecognized type in string concatenation"); + } } - write_exp_elt_opcode (OP_LONG); - write_exp_elt_type (parse_type->builtin_char); - write_exp_elt_longcst ((LONGEST)'\0'); - write_exp_elt_opcode (OP_LONG); - write_exp_elt_opcode (OP_ARRAY); - write_exp_elt_longcst ((LONGEST) 0); - write_exp_elt_longcst ((LONGEST) ($1.length)); - write_exp_elt_opcode (OP_ARRAY); - free ($1.ptr); + + write_exp_string_vector (type, &$1); + for (i = 0; i < $1.len; ++i) + free ($1.tokens[i].ptr); + free ($1.tokens); } ; @@ -1359,6 +1388,263 @@ parse_number (p, len, parsed_float, putithere) return INT; } +/* Temporary obstack used for holding strings. */ +static struct obstack tempbuf; +static int tempbuf_init; + +/* Parse a C escape sequence. The initial backslash of the sequence + is at (*PTR)[-1]. *PTR will be updated to point to just after the + last character of the sequence. If OUTPUT is not NULL, the + translated form of the escape sequence will be written there. If + OUTPUT is NULL, no output is written and the call will only affect + *PTR. If an escape sequence is expressed in target bytes, then the + entire sequence will simply be copied to OUTPUT. Return 1 if any + character was emitted, 0 otherwise. */ + +int +c_parse_escape (char **ptr, struct obstack *output) +{ + char *tokptr = *ptr; + int result = 1; + + /* Some escape sequences undergo character set conversion. Those we + translate here. */ + switch (*tokptr) + { + /* Hex escapes do not undergo character set conversion, so keep + the escape sequence for later. */ + case 'x': + if (output) + obstack_grow_str (output, "\\x"); + ++tokptr; + if (!isxdigit (*tokptr)) + error (_("\\x escape without a following hex digit")); + while (isxdigit (*tokptr)) + { + if (output) + obstack_1grow (output, *tokptr); + ++tokptr; + } + break; + + /* Octal escapes do not undergo character set conversion, so + keep the escape sequence for later. */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + if (output) + obstack_grow_str (output, "\\"); + while (isdigit (*tokptr) && *tokptr != '8' && *tokptr != '9') + { + if (output) + obstack_1grow (output, *tokptr); + ++tokptr; + } + break; + + /* We handle UCNs later. We could handle them here, but that + would mean a spurious error in the case where the UCN could + be converted to the target charset but not the host + charset. */ + case 'u': + case 'U': + { + char c = *tokptr; + int i, len = c == 'U' ? 8 : 4; + if (output) + { + obstack_1grow (output, '\\'); + obstack_1grow (output, *tokptr); + } + ++tokptr; + if (!isxdigit (*tokptr)) + error (_("\\%c escape without a following hex digit"), c); + for (i = 0; i < len && isxdigit (*tokptr); ++i) + { + if (output) + obstack_1grow (output, *tokptr); + ++tokptr; + } + } + break; + + /* We must pass backslash through so that it does not + cause quoting during the second expansion. */ + case '\\': + if (output) + obstack_grow_str (output, "\\\\"); + ++tokptr; + break; + + /* Escapes which undergo conversion. */ + case 'a': + if (output) + obstack_1grow (output, '\a'); + ++tokptr; + break; + case 'b': + if (output) + obstack_1grow (output, '\b'); + ++tokptr; + break; + case 'f': + if (output) + obstack_1grow (output, '\f'); + ++tokptr; + break; + case 'n': + if (output) + obstack_1grow (output, '\n'); + ++tokptr; + break; + case 'r': + if (output) + obstack_1grow (output, '\r'); + ++tokptr; + break; + case 't': + if (output) + obstack_1grow (output, '\t'); + ++tokptr; + break; + case 'v': + if (output) + obstack_1grow (output, '\v'); + ++tokptr; + break; + + /* GCC extension. */ + case 'e': + if (output) + obstack_1grow (output, HOST_ESCAPE_CHAR); + ++tokptr; + break; + + /* Backslash-newline expands to nothing at all. */ + case '\n': + ++tokptr; + result = 0; + break; + + /* A few escapes just expand to the character itself. */ + case '\'': + case '\"': + case '?': + /* GCC extensions. */ + case '(': + case '{': + case '[': + case '%': + /* Unrecognized escapes turn into the character itself. */ + default: + if (output) + obstack_1grow (output, *tokptr); + ++tokptr; + break; + } + *ptr = tokptr; + return result; +} + +/* Parse a string or character literal from TOKPTR. The string or + character may be wide or unicode. *OUTPTR is set to just after the + end of the literal in the input string. The resulting token is + stored in VALUE. This returns a token value, either STRING or + CHAR, depending on what was parsed. *HOST_CHARS is set to the + number of host characters in the literal. */ +static int +parse_string_or_char (char *tokptr, char **outptr, struct typed_stoken *value, + int *host_chars) +{ + int quote, i; + enum c_string_type type; + + /* Build the gdb internal form of the input string in tempbuf. Note + that the buffer is null byte terminated *only* for the + convenience of debugging gdb itself and printing the buffer + contents when the buffer contains no embedded nulls. Gdb does + not depend upon the buffer being null byte terminated, it uses + the length string instead. This allows gdb to handle C strings + (as well as strings in other languages) with embedded null + bytes */ + + if (!tempbuf_init) + tempbuf_init = 1; + else + obstack_free (&tempbuf, NULL); + obstack_init (&tempbuf); + + /* Record the string type. */ + if (*tokptr == 'L') + { + type = C_WIDE_STRING; + ++tokptr; + } + else if (*tokptr == 'u') + { + type = C_STRING_16; + ++tokptr; + } + else if (*tokptr == 'U') + { + type = C_STRING_32; + ++tokptr; + } + else + type = C_STRING; + + /* Skip the quote. */ + quote = *tokptr; + if (quote == '\'') + type |= C_CHAR; + ++tokptr; + + *host_chars = 0; + + while (*tokptr) + { + char c = *tokptr; + if (c == '\\') + { + ++tokptr; + *host_chars += c_parse_escape (&tokptr, &tempbuf); + } + else if (c == quote) + break; + else + { + obstack_1grow (&tempbuf, c); + ++tokptr; + /* FIXME: this does the wrong thing with multi-byte host + characters. We could use mbrlen here, but that would + make "set host-charset" a bit less useful. */ + ++*host_chars; + } + } + + if (*tokptr != quote) + { + if (quote == '"') + error ("Unterminated string in expression."); + else + error ("Unmatched single quote."); + } + ++tokptr; + + value->type = type; + value->ptr = obstack_base (&tempbuf); + value->length = obstack_object_size (&tempbuf); + + *outptr = tokptr; + + return quote == '"' ? STRING : CHAR; +} + struct token { char *operator; @@ -1528,12 +1814,6 @@ yylex () int namelen; unsigned int i; char *tokstart; - char *tokptr; - int tempbufindex; - static char *tempbuf; - static int tempbufsize; - char * token_string = NULL; - int class_prefix = 0; int saw_structop = last_was_structop; char *copy; @@ -1605,46 +1885,6 @@ yylex () lexptr++; goto retry; - case '\'': - /* We either have a character constant ('0' or '\177' for example) - or we have a quoted symbol reference ('foo(int,int)' in C++ - for example). */ - lexptr++; - c = *lexptr++; - if (c == '\\') - c = parse_escape (&lexptr); - else if (c == '\'') - error ("Empty character constant."); - else if (! host_char_to_target (c, &c)) - { - int toklen = lexptr - tokstart + 1; - char *tok = alloca (toklen + 1); - memcpy (tok, tokstart, toklen); - tok[toklen] = '\0'; - error ("There is no character corresponding to %s in the target " - "character set `%s'.", tok, target_charset ()); - } - - yylval.typed_val_int.val = c; - yylval.typed_val_int.type = parse_type->builtin_char; - - c = *lexptr++; - if (c != '\'') - { - namelen = skip_quoted (tokstart) - tokstart; - if (namelen > 2) - { - lexptr = tokstart + namelen; - if (lexptr[-1] != '\'') - error ("Unmatched single quote."); - namelen -= 2; - tokstart++; - goto tryname; - } - error ("Invalid character constant."); - } - return INT; - case '(': paren_depth++; lexptr++; @@ -1762,70 +2002,33 @@ yylex () lexptr++; return c; + case 'L': + case 'u': + case 'U': + if (tokstart[1] != '"' && tokstart[1] != '\'') + break; + /* Fall through. */ + case '\'': case '"': - - /* Build the gdb internal form of the input string in tempbuf, - translating any standard C escape forms seen. Note that the - buffer is null byte terminated *only* for the convenience of - debugging gdb itself and printing the buffer contents when - the buffer contains no embedded nulls. Gdb does not depend - upon the buffer being null byte terminated, it uses the length - string instead. This allows gdb to handle C strings (as well - as strings in other languages) with embedded null bytes */ - - tokptr = ++tokstart; - tempbufindex = 0; - - do { - char *char_start_pos = tokptr; - - /* Grow the static temp buffer if necessary, including allocating - the first one on demand. */ - if (tempbufindex + 1 >= tempbufsize) - { - tempbuf = (char *) realloc (tempbuf, tempbufsize += 64); - } - switch (*tokptr) + { + int host_len; + int result = parse_string_or_char (tokstart, &lexptr, &yylval.tsval, + &host_len); + if (result == CHAR) { - case '\0': - case '"': - /* Do nothing, loop will terminate. */ - break; - case '\\': - tokptr++; - c = parse_escape (&tokptr); - if (c == -1) + if (host_len == 0) + error ("Empty character constant."); + else if (host_len > 2 && c == '\'') { - continue; + ++tokstart; + namelen = lexptr - tokstart - 1; + goto tryname; } - tempbuf[tempbufindex++] = c; - break; - default: - c = *tokptr++; - if (! host_char_to_target (c, &c)) - { - int len = tokptr - char_start_pos; - char *copy = alloca (len + 1); - memcpy (copy, char_start_pos, len); - copy[len] = '\0'; - - error ("There is no character corresponding to `%s' " - "in the target character set `%s'.", - copy, target_charset ()); - } - tempbuf[tempbufindex++] = c; - break; + else if (host_len > 1) + error ("Invalid character constant."); } - } while ((*tokptr != '"') && (*tokptr != '\0')); - if (*tokptr++ != '"') - { - error ("Unterminated string in expression."); - } - tempbuf[tempbufindex] = '\0'; /* See note above */ - yylval.sval.ptr = tempbuf; - yylval.sval.length = tempbufindex; - lexptr = tokptr; - return (STRING); + return result; + } } if (!(c == '_' || c == '$' diff --git a/gdb/c-lang.c b/gdb/c-lang.c index 8b5410f..deab3f4 100644 --- a/gdb/c-lang.c +++ b/gdb/c-lang.c @@ -33,48 +33,304 @@ #include "demangle.h" #include "cp-abi.h" #include "cp-support.h" +#include "gdb_obstack.h" +#include <ctype.h> extern void _initialize_c_language (void); -static void c_emit_char (int c, struct ui_file * stream, int quoter); + +/* Given a C string type, STR_TYPE, return the corresponding target + character set name. */ + +static const char * +charset_for_string_type (enum c_string_type str_type) +{ + switch (str_type & ~C_CHAR) + { + case C_STRING: + return target_charset (); + case C_WIDE_STRING: + return target_wide_charset (); + case C_STRING_16: + /* FIXME: UCS-2 is not always correct. */ + if (gdbarch_byte_order (current_gdbarch) == BFD_ENDIAN_BIG) + return "UCS-2BE"; + else + return "UCS-2LE"; + case C_STRING_32: + /* FIXME: UCS-4 is not always correct. */ + if (gdbarch_byte_order (current_gdbarch) == BFD_ENDIAN_BIG) + return "UCS-4BE"; + else + return "UCS-4LE"; + } + internal_error (__FILE__, __LINE__, "unhandled c_string_type"); +} + +/* Classify ELTTYPE according to what kind of character it is. Return + the enum constant representing the character type. Also set + *ENCODING to the name of the character set to use when converting + characters of this type to the host character set. */ + +static enum c_string_type +classify_type (struct type *elttype, const char **encoding) +{ + struct type *saved_type; + enum c_string_type result; + + /* We do one or two passes -- one on ELTTYPE, and then maybe a + second one on a typedef target. */ + do + { + char *name = TYPE_NAME (elttype); + + if (TYPE_CODE (elttype) == TYPE_CODE_CHAR || !name) + { + result = C_CHAR; + goto done; + } + + if (!strcmp (name, "wchar_t")) + { + result = C_WIDE_CHAR; + goto done; + } + + if (!strcmp (name, "char16_t")) + { + result = C_CHAR_16; + goto done; + } + + if (!strcmp (name, "char32_t")) + { + result = C_CHAR_32; + goto done; + } + + saved_type = elttype; + CHECK_TYPEDEF (elttype); + } + while (elttype != saved_type); + + /* Punt. */ + result = C_CHAR; + + done: + *encoding = charset_for_string_type (result); + return result; +} + +/* Return true if print_wchar can display W without resorting to a + numeric escape, false otherwise. */ + +static int +wchar_printable (gdb_wchar_t w) +{ + return (gdb_iswprint (w) + || w == LCST ('\a') || w == LCST ('\b') + || w == LCST ('\f') || w == LCST ('\n') + || w == LCST ('\r') || w == LCST ('\t') + || w == LCST ('\v')); +} + +/* A helper function that converts the contents of STRING to wide + characters and then appends them to OUTPUT. */ + +static void +append_string_as_wide (const char *string, struct obstack *output) +{ + for (; *string; ++string) + { + gdb_wchar_t w = gdb_btowc (*string); + obstack_grow (output, &w, sizeof (gdb_wchar_t)); + } +} + +/* Print a wide character W to OUTPUT. ORIG is a pointer to the + original (target) bytes representing the character, ORIG_LEN is the + number of valid bytes. WIDTH is the number of bytes in a base + characters of the type. OUTPUT is an obstack to which wide + characters are emitted. QUOTER is a (narrow) character indicating + the style of quotes surrounding the character to be printed. + NEED_ESCAPE is an in/out flag which is used to track numeric + escapes across calls. */ + +static void +print_wchar (gdb_wint_t w, const gdb_byte *orig, int orig_len, + int width, struct obstack *output, int quoter, + int *need_escapep) +{ + int need_escape = *need_escapep; + *need_escapep = 0; + if (gdb_iswprint (w) && (!need_escape || (!gdb_iswdigit (w) + && w != LCST ('8') + && w != LCST ('9')))) + { + if (w == gdb_btowc (quoter) || w == LCST ('\\')) + obstack_grow_wstr (output, LCST ("\\")); + obstack_grow (output, &w, sizeof (gdb_wchar_t)); + } + else + { + switch (w) + { + case LCST ('\a'): + obstack_grow_wstr (output, LCST ("\\a")); + break; + case LCST ('\b'): + obstack_grow_wstr (output, LCST ("\\b")); + break; + case LCST ('\f'): + obstack_grow_wstr (output, LCST ("\\f")); + break; + case LCST ('\n'): + obstack_grow_wstr (output, LCST ("\\n")); + break; + case LCST ('\r'): + obstack_grow_wstr (output, LCST ("\\r")); + break; + case LCST ('\t'): + obstack_grow_wstr (output, LCST ("\\t")); + break; + case LCST ('\v'): + obstack_grow_wstr (output, LCST ("\\v")); + break; + default: + { + int i; + + for (i = 0; i + width <= orig_len; i += width) + { + char octal[30]; + ULONGEST value = extract_unsigned_integer (&orig[i], width); + sprintf (octal, "\\%lo", (long) value); + append_string_as_wide (octal, output); + } + /* If we somehow have extra bytes, print them now. */ + while (i < orig_len) + { + char octal[5]; + sprintf (octal, "\\%.3o", orig[i] & 0xff); + append_string_as_wide (octal, output); + ++i; + } + + *need_escapep = 1; + } + break; + } + } +} /* Print the character C on STREAM as part of the contents of a literal string whose delimiter is QUOTER. Note that that format for printing characters and strings is language specific. */ static void -c_emit_char (int c, struct ui_file *stream, int quoter) +c_emit_char (int c, struct type *type, struct ui_file *stream, int quoter) { - const char *escape; - int host_char; + struct obstack wchar_buf, output; + struct cleanup *cleanups; + const char *encoding; + gdb_byte *buf; + struct wchar_iterator *iter; + int need_escape = 0; - c &= 0xFF; /* Avoid sign bit follies */ + classify_type (type, &encoding); - escape = c_target_char_has_backslash_escape (c); - if (escape) - { - if (quoter == '"' && strcmp (escape, "0") == 0) - /* Print nulls embedded in double quoted strings as \000 to - prevent ambiguity. */ - fprintf_filtered (stream, "\\000"); - else - fprintf_filtered (stream, "\\%s", escape); - } - else if (target_char_to_host (c, &host_char) - && host_char_print_literally (host_char)) + buf = alloca (TYPE_LENGTH (type)); + pack_long (buf, type, c); + + iter = make_wchar_iterator (buf, TYPE_LENGTH (type), encoding, + TYPE_LENGTH (type)); + cleanups = make_cleanup_wchar_iterator (iter); + + /* This holds the printable form of the wchar_t data. */ + obstack_init (&wchar_buf); + make_cleanup_obstack_free (&wchar_buf); + + while (1) { - if (host_char == '\\' || host_char == quoter) - fputs_filtered ("\\", stream); - fprintf_filtered (stream, "%c", host_char); + int num_chars; + gdb_wchar_t *chars; + const gdb_byte *buf; + size_t buflen; + int print_escape = 1; + enum wchar_iterate_result result; + + num_chars = wchar_iterate (iter, &result, &chars, &buf, &buflen); + if (num_chars < 0) + break; + if (num_chars > 0) + { + /* If all characters are printable, print them. Otherwise, + we're going to have to print an escape sequence. We + check all characters because we want to print the target + bytes in the escape sequence, and we don't know character + boundaries there. */ + int i; + + print_escape = 0; + for (i = 0; i < num_chars; ++i) + if (!wchar_printable (chars[i])) + { + print_escape = 1; + break; + } + + if (!print_escape) + { + for (i = 0; i < num_chars; ++i) + print_wchar (chars[i], buf, buflen, TYPE_LENGTH (type), + &wchar_buf, quoter, &need_escape); + } + } + + /* This handles the NUM_CHARS == 0 case as well. */ + if (print_escape) + print_wchar (gdb_WEOF, buf, buflen, TYPE_LENGTH (type), &wchar_buf, + quoter, &need_escape); } - else - fprintf_filtered (stream, "\\%.3o", (unsigned int) c); + + /* The output in the host encoding. */ + obstack_init (&output); + make_cleanup_obstack_free (&output); + + convert_between_encodings ("wchar_t", host_charset (), + obstack_base (&wchar_buf), + obstack_object_size (&wchar_buf), + 1, &output, translit_char); + obstack_1grow (&output, '\0'); + + fputs_filtered (obstack_base (&output), stream); + + do_cleanups (cleanups); } void -c_printchar (int c, struct ui_file *stream) +c_printchar (int c, struct type *type, struct ui_file *stream) { + enum c_string_type str_type; + const char *encoding; + + str_type = classify_type (type, &encoding); + switch (str_type) + { + case C_CHAR: + break; + case C_WIDE_CHAR: + fputc_filtered ('L', stream); + break; + case C_CHAR_16: + fputc_filtered ('u', stream); + break; + case C_CHAR_32: + fputc_filtered ('U', stream); + break; + } + fputc_filtered ('\'', stream); - LA_EMIT_CHAR (c, stream, '\''); + LA_EMIT_CHAR (c, type, stream, '\''); fputc_filtered ('\'', stream); } @@ -85,87 +341,208 @@ c_printchar (int c, struct ui_file *stream) printing LENGTH characters, or if FORCE_ELLIPSES. */ void -c_printstr (struct ui_file *stream, const gdb_byte *string, - unsigned int length, int width, int force_ellipses, +c_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string, + unsigned int length, int force_ellipses, const struct value_print_options *options) { unsigned int i; unsigned int things_printed = 0; int in_quotes = 0; int need_comma = 0; + int width = TYPE_LENGTH (type); + struct obstack wchar_buf, output; + struct cleanup *cleanup; + enum c_string_type str_type; + const char *encoding; + struct wchar_iterator *iter; + int finished = 0; + int need_escape = 0; /* If the string was not truncated due to `set print elements', and the last byte of it is a null, we don't print that, in traditional C style. */ if (!force_ellipses && length > 0 - && (extract_unsigned_integer (string + (length - 1) * width, width) - == '\0')) + && (extract_unsigned_integer (string + (length - 1) * width, width) == 0)) length--; + str_type = classify_type (type, &encoding) & ~C_CHAR; + switch (str_type) + { + case C_STRING: + break; + case C_WIDE_STRING: + fputs_filtered ("L", stream); + break; + case C_STRING_16: + fputs_filtered ("u", stream); + break; + case C_STRING_32: + fputs_filtered ("U", stream); + break; + } + if (length == 0) { fputs_filtered ("\"\"", stream); return; } - for (i = 0; i < length && things_printed < options->print_max; ++i) + if (length == -1) + { + unsigned long current_char = 1; + for (i = 0; current_char; ++i) + { + QUIT; + current_char = extract_unsigned_integer (string + i * width, width); + } + length = i; + } + + /* Arrange to iterate over the characters, in wchar_t form. */ + iter = make_wchar_iterator (string, length * width, encoding, width); + cleanup = make_cleanup_wchar_iterator (iter); + + /* WCHAR_BUF is the obstack we use to represent the string in + wchar_t form. */ + obstack_init (&wchar_buf); + make_cleanup_obstack_free (&wchar_buf); + + while (!finished && things_printed < options->print_max) { - /* Position of the character we are examining - to see whether it is repeated. */ - unsigned int rep1; - /* Number of repetitions we have detected so far. */ - unsigned int reps; - unsigned long current_char; + int num_chars; + enum wchar_iterate_result result; + gdb_wchar_t *chars; + const gdb_byte *buf; + size_t buflen; QUIT; if (need_comma) { - fputs_filtered (", ", stream); + obstack_grow_wstr (&wchar_buf, LCST (", ")); need_comma = 0; } - current_char = extract_unsigned_integer (string + i * width, width); + num_chars = wchar_iterate (iter, &result, &chars, &buf, &buflen); + /* We only look at repetitions when we were able to convert a + single character in isolation. This makes the code simpler + and probably does the sensible thing in the majority of + cases. */ + while (num_chars == 1) + { + /* Count the number of repetitions. */ + unsigned int reps = 0; + gdb_wchar_t current_char = chars[0]; + const gdb_byte *orig_buf = buf; + int orig_len = buflen; - rep1 = i + 1; - reps = 1; - while (rep1 < length - && extract_unsigned_integer (string + rep1 * width, width) - == current_char) + if (need_comma) + { + obstack_grow_wstr (&wchar_buf, LCST (", ")); + need_comma = 0; + } + + while (num_chars == 1 && current_char == chars[0]) + { + num_chars = wchar_iterate (iter, &result, &chars, &buf, &buflen); + ++reps; + } + + /* Emit CURRENT_CHAR according to the repetition count and + options. */ + if (reps > options->repeat_count_threshold) + { + if (in_quotes) + { + if (options->inspect_it) + obstack_grow_wstr (&wchar_buf, LCST ("\\\", ")); + else + obstack_grow_wstr (&wchar_buf, LCST ("\", ")); + in_quotes = 0; + } + obstack_grow_wstr (&wchar_buf, LCST ("'")); + need_escape = 0; + print_wchar (current_char, orig_buf, orig_len, width, + &wchar_buf, '\'', &need_escape); + obstack_grow_wstr (&wchar_buf, LCST ("'")); + { + /* Painful gyrations. */ + int j; + char *s = xstrprintf (_(" <repeats %u times>"), reps); + for (j = 0; s[j]; ++j) + { + gdb_wchar_t w = gdb_btowc (s[j]); + obstack_grow (&wchar_buf, &w, sizeof (gdb_wchar_t)); + } + xfree (s); + } + things_printed += options->repeat_count_threshold; + need_comma = 1; + } + else + { + /* Saw the character one or more times, but fewer than + the repetition threshold. */ + if (!in_quotes) + { + if (options->inspect_it) + obstack_grow_wstr (&wchar_buf, LCST ("\\\"")); + else + obstack_grow_wstr (&wchar_buf, LCST ("\"")); + in_quotes = 1; + need_escape = 0; + } + + while (reps-- > 0) + { + print_wchar (current_char, orig_buf, orig_len, width, + &wchar_buf, '"', &need_escape); + ++things_printed; + } + } + } + + /* NUM_CHARS and the other outputs from wchar_iterate are valid + here regardless of which branch was taken above. */ + if (num_chars < 0) { - ++rep1; - ++reps; + /* Hit EOF. */ + finished = 1; + break; } - if (reps > options->repeat_count_threshold) + switch (result) { - if (in_quotes) + case wchar_iterate_invalid: + if (!in_quotes) { if (options->inspect_it) - fputs_filtered ("\\\", ", stream); + obstack_grow_wstr (&wchar_buf, LCST ("\\\"")); else - fputs_filtered ("\", ", stream); - in_quotes = 0; + obstack_grow_wstr (&wchar_buf, LCST ("\"")); + in_quotes = 1; } - LA_PRINT_CHAR (current_char, stream); - fprintf_filtered (stream, _(" <repeats %u times>"), reps); - i = rep1 - 1; - things_printed += options->repeat_count_threshold; - need_comma = 1; - } - else - { - if (!in_quotes) + need_escape = 0; + print_wchar (gdb_WEOF, buf, buflen, width, &wchar_buf, + '"', &need_escape); + break; + + case wchar_iterate_incomplete: + if (in_quotes) { if (options->inspect_it) - fputs_filtered ("\\\"", stream); + obstack_grow_wstr (&wchar_buf, LCST ("\\\",")); else - fputs_filtered ("\"", stream); - in_quotes = 1; + obstack_grow_wstr (&wchar_buf, LCST ("\",")); + in_quotes = 0; } - LA_EMIT_CHAR (current_char, stream, '"'); - ++things_printed; + obstack_grow_wstr (&wchar_buf, LCST (" <incomplete sequence ")); + print_wchar (gdb_WEOF, buf, buflen, width, &wchar_buf, + 0, &need_escape); + obstack_grow_wstr (&wchar_buf, LCST (">")); + finished = 1; + break; } } @@ -173,13 +550,27 @@ c_printstr (struct ui_file *stream, const gdb_byte *string, if (in_quotes) { if (options->inspect_it) - fputs_filtered ("\\\"", stream); + obstack_grow_wstr (&wchar_buf, LCST ("\\\"")); else - fputs_filtered ("\"", stream); + obstack_grow_wstr (&wchar_buf, LCST ("\"")); } - if (force_ellipses || i < length) - fputs_filtered ("...", stream); + if (force_ellipses || !finished) + obstack_grow_wstr (&wchar_buf, LCST ("...")); + + /* OUTPUT is where we collect `char's for printing. */ + obstack_init (&output); + make_cleanup_obstack_free (&output); + + convert_between_encodings ("wchar_t", host_charset (), + obstack_base (&wchar_buf), + obstack_object_size (&wchar_buf), + 1, &output, translit_char); + obstack_1grow (&output, '\0'); + + fputs_filtered (obstack_base (&output), stream); + + do_cleanups (cleanup); } /* Obtain a C string from the inferior storing it in a newly allocated @@ -298,7 +689,285 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length, } -/* Preprocessing and parsing C and C++ expressions. */ +/* Evaluating C and C++ expressions. */ + +/* Convert a UCN. The digits of the UCN start at P and extend no + farther than LIMIT. DEST_CHARSET is the name of the character set + into which the UCN should be converted. The results are written to + OUTPUT. LENGTH is the maximum length of the UCN, either 4 or 8. + Returns a pointer to just after the final digit of the UCN. */ + +static char * +convert_ucn (char *p, char *limit, const char *dest_charset, + struct obstack *output, int length) +{ + unsigned long result = 0; + gdb_byte data[4]; + int i; + + for (i = 0; i < length && p < limit && isxdigit (*p); ++i, ++p) + result = (result << 4) + host_hex_value (*p); + + for (i = 3; i >= 0; --i) + { + data[i] = result & 0xff; + result >>= 8; + } + + convert_between_encodings ("UCS-4BE", dest_charset, data, 4, 4, output, + translit_none); + + return p; +} + +/* Emit a character, VALUE, which was specified numerically, to + OUTPUT. TYPE is the target character type. */ + +static void +emit_numeric_character (struct type *type, unsigned long value, + struct obstack *output) +{ + gdb_byte *buffer; + + buffer = alloca (TYPE_LENGTH (type)); + pack_long (buffer, type, value); + obstack_grow (output, buffer, TYPE_LENGTH (type)); +} + +/* Convert an octal escape sequence. TYPE is the target character + type. The digits of the escape sequence begin at P and extend no + farther than LIMIT. The result is written to OUTPUT. Returns a + pointer to just after the final digit of the escape sequence. */ + +static char * +convert_octal (struct type *type, char *p, char *limit, struct obstack *output) +{ + unsigned long value = 0; + + while (p < limit && isdigit (*p) && *p != '8' && *p != '9') + { + value = 8 * value + host_hex_value (*p); + ++p; + } + + emit_numeric_character (type, value, output); + + return p; +} + +/* Convert a hex escape sequence. TYPE is the target character type. + The digits of the escape sequence begin at P and extend no farther + than LIMIT. The result is written to OUTPUT. Returns a pointer to + just after the final digit of the escape sequence. */ + +static char * +convert_hex (struct type *type, char *p, char *limit, struct obstack *output) +{ + unsigned long value = 0; + + while (p < limit && isxdigit (*p)) + { + value = 16 * value + host_hex_value (*p); + ++p; + } + + emit_numeric_character (type, value, output); + + return p; +} + +#define ADVANCE \ + do { \ + ++p; \ + if (p == limit) \ + error (_("Malformed escape sequence")); \ + } while (0) + +/* Convert an escape sequence to a target format. TYPE is the target + character type to use, and DEST_CHARSET is the name of the target + character set. The backslash of the escape sequence is at *P, and + the escape sequence will not extend past LIMIT. The results are + written to OUTPUT. Returns a pointer to just past the final + character of the escape sequence. */ + +static char * +convert_escape (struct type *type, const char *dest_charset, + char *p, char *limit, struct obstack *output) +{ + /* Skip the backslash. */ + ADVANCE; + + switch (*p) + { + case '\\': + obstack_1grow (output, '\\'); + ++p; + break; + + case 'x': + ADVANCE; + if (!isxdigit (*p)) + error (_("\\x used with no following hex digits.")); + p = convert_hex (type, p, limit, output); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + p = convert_octal (type, p, limit, output); + break; + + case 'u': + case 'U': + { + int length = *p == 'u' ? 4 : 8; + ADVANCE; + if (!isxdigit (*p)) + error (_("\\u used with no following hex digits")); + p = convert_ucn (p, limit, dest_charset, output, length); + } + } + + return p; +} + +/* Given a single string from a (C-specific) OP_STRING list, convert + it to a target string, handling escape sequences specially. The + output is written to OUTPUT. DATA is the input string, which has + length LEN. DEST_CHARSET is the name of the target character set, + and TYPE is the type of target character to use. */ + +static void +parse_one_string (struct obstack *output, char *data, int len, + const char *dest_charset, struct type *type) +{ + char *limit; + + limit = data + len; + + while (data < limit) + { + char *p = data; + /* Look for next escape, or the end of the input. */ + while (p < limit && *p != '\\') + ++p; + /* If we saw a run of characters, convert them all. */ + if (p > data) + convert_between_encodings (host_charset (), dest_charset, + data, p - data, 1, output, translit_none); + /* If we saw an escape, convert it. */ + if (p < limit) + p = convert_escape (type, dest_charset, p, limit, output); + data = p; + } +} + +/* Expression evaluator for the C language family. Most operations + are delegated to evaluate_subexp_standard; see that function for a + description of the arguments. */ + +static struct value * +evaluate_subexp_c (struct type *expect_type, struct expression *exp, + int *pos, enum noside noside) +{ + enum exp_opcode op = exp->elts[*pos].opcode; + + switch (op) + { + case OP_STRING: + { + int oplen, limit; + struct type *type; + struct obstack output; + struct cleanup *cleanup; + struct value *result; + enum c_string_type dest_type; + const char *dest_charset; + + obstack_init (&output); + cleanup = make_cleanup_obstack_free (&output); + + ++*pos; + oplen = longest_to_int (exp->elts[*pos].longconst); + + ++*pos; + limit = *pos + BYTES_TO_EXP_ELEM (oplen + 1); + dest_type + = (enum c_string_type) longest_to_int (exp->elts[*pos].longconst); + switch (dest_type & ~C_CHAR) + { + case C_STRING: + type = language_string_char_type (current_language, + current_gdbarch); + break; + case C_WIDE_STRING: + type = lookup_typename ("wchar_t", NULL, 0); + break; + case C_STRING_16: + type = lookup_typename ("char16_t", NULL, 0); + break; + case C_STRING_32: + type = lookup_typename ("char32_t", NULL, 0); + break; + default: + internal_error (__FILE__, __LINE__, "unhandled c_string_type"); + } + dest_charset = charset_for_string_type (dest_type); + + ++*pos; + while (*pos < limit) + { + int len; + + len = longest_to_int (exp->elts[*pos].longconst); + + ++*pos; + if (noside != EVAL_SKIP) + parse_one_string (&output, &exp->elts[*pos].string, len, + dest_charset, type); + *pos += BYTES_TO_EXP_ELEM (len); + } + + /* Skip the trailing length and opcode. */ + *pos += 2; + + if (noside == EVAL_SKIP) + return NULL; + + if ((dest_type & C_CHAR) != 0) + { + LONGEST value; + + if (obstack_object_size (&output) != TYPE_LENGTH (type)) + error (_("Could not convert character constant to target character set")); + value = unpack_long (type, obstack_base (&output)); + result = value_from_longest (type, value); + } + else + { + int i; + /* Write the terminating character. */ + for (i = 0; i < TYPE_LENGTH (type); ++i) + obstack_1grow (&output, 0); + result = value_typed_string (obstack_base (&output), + obstack_object_size (&output), + type); + } + do_cleanups (cleanup); + return result; + } + break; + + default: + break; + } + return evaluate_subexp_standard (expect_type, exp, pos, noside); +} @@ -396,6 +1065,15 @@ c_language_arch_info (struct gdbarch *gdbarch, lai->bool_type_default = builtin->builtin_int; } +static const struct exp_descriptor exp_descriptor_c = +{ + print_subexp_standard, + operator_length_standard, + op_name_standard, + dump_subexp_body_standard, + evaluate_subexp_c +}; + const struct language_defn c_language_defn = { "c", /* Language name */ @@ -405,7 +1083,7 @@ const struct language_defn c_language_defn = case_sensitive_on, array_row_major, macro_expansion_c, - &exp_descriptor_standard, + &exp_descriptor_c, c_parse, c_error, null_post_parser, @@ -524,7 +1202,7 @@ const struct language_defn cplus_language_defn = case_sensitive_on, array_row_major, macro_expansion_c, - &exp_descriptor_standard, + &exp_descriptor_c, c_parse, c_error, null_post_parser, @@ -562,7 +1240,7 @@ const struct language_defn asm_language_defn = case_sensitive_on, array_row_major, macro_expansion_c, - &exp_descriptor_standard, + &exp_descriptor_c, c_parse, c_error, null_post_parser, @@ -605,7 +1283,7 @@ const struct language_defn minimal_language_defn = case_sensitive_on, array_row_major, macro_expansion_c, - &exp_descriptor_standard, + &exp_descriptor_c, c_parse, c_error, null_post_parser, diff --git a/gdb/c-lang.h b/gdb/c-lang.h index 06c5767..ba9d996 100644 --- a/gdb/c-lang.h +++ b/gdb/c-lang.h @@ -29,9 +29,38 @@ struct language_arch_info; #include "macroexp.h" -extern int c_parse (void); /* Defined in c-exp.y */ - -extern void c_error (char *); /* Defined in c-exp.y */ +/* The various kinds of C string and character. Note that these + values are chosen so that they may be or'd together in certain + ways. */ +enum c_string_type + { + /* An ordinary string: "value". */ + C_STRING = 0, + /* A wide string: L"value". */ + C_WIDE_STRING = 1, + /* A 16-bit Unicode string: u"value". */ + C_STRING_16 = 2, + /* A 32-bit Unicode string: U"value". */ + C_STRING_32 = 3, + /* An ordinary char: 'v'. This can also be or'd with one of the + above to form the corresponding CHAR value from a STRING + value. */ + C_CHAR = 4, + /* A wide char: L'v'. */ + C_WIDE_CHAR = 5, + /* A 16-bit Unicode char: u'v'. */ + C_CHAR_16 = 6, + /* A 32-bit Unicode char: U'v'. */ + C_CHAR_32 = 7 + }; + +/* Defined in c-exp.y. */ + +extern int c_parse (void); + +extern void c_error (char *); + +extern int c_parse_escape (char **, struct obstack *); /* Defined in c-typeprint.c */ extern void c_print_type (struct type *, char *, struct ui_file *, int, @@ -48,10 +77,10 @@ extern int c_value_print (struct value *, struct ui_file *, /* These are in c-lang.c: */ -extern void c_printchar (int, struct ui_file *); +extern void c_printchar (int, struct type *, struct ui_file *); -extern void c_printstr (struct ui_file * stream, const gdb_byte *string, - unsigned int length, int width, +extern void c_printstr (struct ui_file * stream, struct type *elttype, + const gdb_byte *string, unsigned int length, int force_ellipses, const struct value_print_options *options); diff --git a/gdb/c-valprint.c b/gdb/c-valprint.c index 0b616f7..cfdf2c0 100644 --- a/gdb/c-valprint.c +++ b/gdb/c-valprint.c @@ -55,6 +55,18 @@ print_function_pointer_address (CORE_ADDR address, struct ui_file *stream, } +/* A helper for textual_element_type. This checks the name of the + typedef. This is bogus but it isn't apparent that the compiler + provides us the help we may need. */ + +static int +textual_name (const char *name) +{ + return (!strcmp (name, "wchar_t") + || !strcmp (name, "char16_t") + || !strcmp (name, "char32_t")); +} + /* Apply a heuristic to decide whether an array of TYPE or a pointer to TYPE should be printed as a textual string. Return non-zero if it should, or zero if it should be treated as an array of integers @@ -77,6 +89,15 @@ textual_element_type (struct type *type, char format) /* TYPE_CODE_CHAR is always textual. */ if (TYPE_CODE (true_type) == TYPE_CODE_CHAR) return 1; + /* Any other character-like types must be integral. */ + if (TYPE_CODE (true_type) != TYPE_CODE_INT) + return 0; + + /* Check the names of the type and the typedef. */ + if (TYPE_NAME (type) && textual_name (TYPE_NAME (type))) + return 1; + if (TYPE_NAME (true_type) && textual_name (TYPE_NAME (true_type))) + return 1; if (format == 's') { @@ -115,7 +136,8 @@ c_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, { unsigned int i = 0; /* Number of characters printed */ unsigned len; - struct type *elttype; + struct type *elttype, *unresolved_elttype; + struct type *unresolved_type = type; unsigned eltlen; LONGEST val; CORE_ADDR addr; @@ -124,8 +146,9 @@ c_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, switch (TYPE_CODE (type)) { case TYPE_CODE_ARRAY: - elttype = check_typedef (TYPE_TARGET_TYPE (type)); - if (TYPE_LENGTH (type) > 0 && TYPE_LENGTH (TYPE_TARGET_TYPE (type)) > 0) + unresolved_elttype = TYPE_TARGET_TYPE (type); + elttype = check_typedef (unresolved_elttype); + if (TYPE_LENGTH (type) > 0 && TYPE_LENGTH (unresolved_elttype) > 0) { eltlen = TYPE_LENGTH (elttype); len = TYPE_LENGTH (type) / eltlen; @@ -135,7 +158,7 @@ c_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, } /* Print arrays of textual chars with a string syntax. */ - if (textual_element_type (elttype, options->format)) + if (textual_element_type (unresolved_elttype, options->format)) { /* If requested, look for the first null char and only print elements up to it. */ @@ -143,15 +166,19 @@ c_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, { unsigned int temp_len; - /* Look for a NULL char. */ for (temp_len = 0; - (valaddr + embedded_offset)[temp_len] - && temp_len < len && temp_len < options->print_max; - temp_len++); + (temp_len < len + && temp_len < options->print_max + && extract_unsigned_integer (valaddr + embedded_offset + + temp_len * eltlen, + eltlen) == 0); + ++temp_len) + ; len = temp_len; } - LA_PRINT_STRING (stream, valaddr + embedded_offset, len, eltlen, 0, options); + LA_PRINT_STRING (stream, unresolved_elttype, + valaddr + embedded_offset, len, 0, options); i = len; } else @@ -209,7 +236,8 @@ c_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, print_function_pointer_address (addr, stream, options->addressprint); break; } - elttype = check_typedef (TYPE_TARGET_TYPE (type)); + unresolved_elttype = TYPE_TARGET_TYPE (type); + elttype = check_typedef (unresolved_elttype); { addr = unpack_pointer (type, valaddr + embedded_offset); print_unpacked_pointer: @@ -228,12 +256,11 @@ c_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, /* For a pointer to a textual type, also print the string pointed to, unless pointer is null. */ - /* FIXME: need to handle wchar_t here... */ - if (textual_element_type (elttype, options->format) + if (textual_element_type (unresolved_elttype, options->format) && addr != 0) { - i = val_print_string (addr, -1, TYPE_LENGTH (elttype), stream, + i = val_print_string (unresolved_elttype, addr, -1, stream, options); } else if (cp_is_vtbl_member (type)) @@ -268,7 +295,7 @@ c_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, } else { - wtype = TYPE_TARGET_TYPE (type); + wtype = unresolved_elttype; } vt_val = value_at (wtype, vt_address); common_val_print (vt_val, stream, recurse + 1, options, @@ -442,11 +469,11 @@ c_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, Since we don't know whether the value is really intended to be used as an integer or a character, print the character equivalent as well. */ - if (textual_element_type (type, options->format)) + if (textual_element_type (unresolved_type, options->format)) { fputs_filtered (" ", stream); LA_PRINT_CHAR ((unsigned char) unpack_long (type, valaddr + embedded_offset), - stream); + unresolved_type, stream); } } break; @@ -468,7 +495,7 @@ c_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, else fprintf_filtered (stream, "%d", (int) val); fputs_filtered (" ", stream); - LA_PRINT_CHAR ((unsigned char) val, stream); + LA_PRINT_CHAR ((unsigned char) val, unresolved_type, stream); } break; @@ -540,7 +567,7 @@ int c_value_print (struct value *val, struct ui_file *stream, const struct value_print_options *options) { - struct type *type, *real_type; + struct type *type, *real_type, *val_type; int full, top, using_enc; struct value_print_options opts = *options; @@ -553,7 +580,11 @@ c_value_print (struct value *val, struct ui_file *stream, C++: if it is a member pointer, we will take care of that when we print it. */ - type = check_typedef (value_type (val)); + /* Preserve the original type before stripping typedefs. We prefer + to pass down the original type when possible, but for local + checks it is better to look past the typedefs. */ + val_type = value_type (val); + type = check_typedef (val_type); if (TYPE_CODE (type) == TYPE_CODE_PTR || TYPE_CODE (type) == TYPE_CODE_REF) @@ -561,11 +592,12 @@ c_value_print (struct value *val, struct ui_file *stream, /* Hack: remove (char *) for char strings. Their type is indicated by the quoted string anyway. (Don't use textual_element_type here; quoted strings - are always exactly (char *). */ - if (TYPE_CODE (type) == TYPE_CODE_PTR - && TYPE_NAME (type) == NULL - && TYPE_NAME (TYPE_TARGET_TYPE (type)) != NULL - && strcmp (TYPE_NAME (TYPE_TARGET_TYPE (type)), "char") == 0) + are always exactly (char *), (wchar_t *), or the like. */ + if (TYPE_CODE (val_type) == TYPE_CODE_PTR + && TYPE_NAME (val_type) == NULL + && TYPE_NAME (TYPE_TARGET_TYPE (val_type)) != NULL + && (strcmp (TYPE_NAME (TYPE_TARGET_TYPE (val_type)), "char") == 0 + || textual_name (TYPE_NAME (TYPE_TARGET_TYPE (val_type))))) { /* Print nothing */ } @@ -608,6 +640,7 @@ c_value_print (struct value *val, struct ui_file *stream, } type_print (type, "", stream, -1); fprintf_filtered (stream, ") "); + val_type = type; } else { @@ -653,7 +686,7 @@ c_value_print (struct value *val, struct ui_file *stream, /* Otherwise, we end up at the return outside this "if" */ } - return val_print (type, value_contents_all (val), + return val_print (val_type, value_contents_all (val), value_embedded_offset (val), VALUE_ADDRESS (val) + value_offset (val), stream, 0, &opts, current_language); diff --git a/gdb/charset-list.h b/gdb/charset-list.h new file mode 100644 index 0000000..59c64c5 --- /dev/null +++ b/gdb/charset-list.h @@ -0,0 +1,1190 @@ +/* List of character set names for GDB. + + Copyright (C) 2009 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Note that the first entry must always be "auto". + The remaining entries were created by running this script: + + iconv -l | sed -e '/[/]...*$/d' -e 's@^\(.*\)//$@"\1", \\@' + + .. and then removing the final backslash. It would be nice to + separate narrow and wide character sets, but there is no good way + to do that. */ +#define DEFAULT_CHARSET_NAMES \ +"auto", \ +"437", \ +"500", \ +"500V1", \ +"850", \ +"851", \ +"852", \ +"855", \ +"856", \ +"857", \ +"860", \ +"861", \ +"862", \ +"863", \ +"864", \ +"865", \ +"866", \ +"866NAV", \ +"869", \ +"874", \ +"904", \ +"1026", \ +"1046", \ +"1047", \ +"8859_1", \ +"8859_2", \ +"8859_3", \ +"8859_4", \ +"8859_5", \ +"8859_6", \ +"8859_7", \ +"8859_8", \ +"8859_9", \ +"10646-1:1993", \ +"ANSI_X3.4-1968", \ +"ANSI_X3.4-1986", \ +"ANSI_X3.4", \ +"ANSI_X3.110-1983", \ +"ANSI_X3.110", \ +"ARABIC", \ +"ARABIC7", \ +"ARMSCII-8", \ +"ASCII", \ +"ASMO-708", \ +"ASMO_449", \ +"BALTIC", \ +"BIG-5", \ +"BIG-FIVE", \ +"BIG5-HKSCS", \ +"BIG5", \ +"BIG5HKSCS", \ +"BIGFIVE", \ +"BRF", \ +"BS_4730", \ +"CA", \ +"CN-BIG5", \ +"CN-GB", \ +"CN", \ +"CP-AR", \ +"CP-GR", \ +"CP-HU", \ +"CP037", \ +"CP038", \ +"CP273", \ +"CP274", \ +"CP275", \ +"CP278", \ +"CP280", \ +"CP281", \ +"CP282", \ +"CP284", \ +"CP285", \ +"CP290", \ +"CP297", \ +"CP367", \ +"CP420", \ +"CP423", \ +"CP424", \ +"CP437", \ +"CP500", \ +"CP737", \ +"CP775", \ +"CP803", \ +"CP813", \ +"CP819", \ +"CP850", \ +"CP851", \ +"CP852", \ +"CP855", \ +"CP856", \ +"CP857", \ +"CP860", \ +"CP861", \ +"CP862", \ +"CP863", \ +"CP864", \ +"CP865", \ +"CP866", \ +"CP866NAV", \ +"CP868", \ +"CP869", \ +"CP870", \ +"CP871", \ +"CP874", \ +"CP875", \ +"CP880", \ +"CP891", \ +"CP901", \ +"CP902", \ +"CP903", \ +"CP904", \ +"CP905", \ +"CP912", \ +"CP915", \ +"CP916", \ +"CP918", \ +"CP920", \ +"CP921", \ +"CP922", \ +"CP930", \ +"CP932", \ +"CP933", \ +"CP935", \ +"CP936", \ +"CP937", \ +"CP939", \ +"CP949", \ +"CP950", \ +"CP1004", \ +"CP1008", \ +"CP1025", \ +"CP1026", \ +"CP1046", \ +"CP1047", \ +"CP1070", \ +"CP1079", \ +"CP1081", \ +"CP1084", \ +"CP1089", \ +"CP1097", \ +"CP1112", \ +"CP1122", \ +"CP1123", \ +"CP1124", \ +"CP1125", \ +"CP1129", \ +"CP1130", \ +"CP1132", \ +"CP1133", \ +"CP1137", \ +"CP1140", \ +"CP1141", \ +"CP1142", \ +"CP1143", \ +"CP1144", \ +"CP1145", \ +"CP1146", \ +"CP1147", \ +"CP1148", \ +"CP1149", \ +"CP1153", \ +"CP1154", \ +"CP1155", \ +"CP1156", \ +"CP1157", \ +"CP1158", \ +"CP1160", \ +"CP1161", \ +"CP1162", \ +"CP1163", \ +"CP1164", \ +"CP1166", \ +"CP1167", \ +"CP1250", \ +"CP1251", \ +"CP1252", \ +"CP1253", \ +"CP1254", \ +"CP1255", \ +"CP1256", \ +"CP1257", \ +"CP1258", \ +"CP1282", \ +"CP1361", \ +"CP1364", \ +"CP1371", \ +"CP1388", \ +"CP1390", \ +"CP1399", \ +"CP4517", \ +"CP4899", \ +"CP4909", \ +"CP4971", \ +"CP5347", \ +"CP9030", \ +"CP9066", \ +"CP9448", \ +"CP10007", \ +"CP12712", \ +"CP16804", \ +"CPIBM861", \ +"CSA7-1", \ +"CSA7-2", \ +"CSASCII", \ +"CSA_T500-1983", \ +"CSA_T500", \ +"CSA_Z243.4-1985-1", \ +"CSA_Z243.4-1985-2", \ +"CSA_Z243.419851", \ +"CSA_Z243.419852", \ +"CSDECMCS", \ +"CSEBCDICATDE", \ +"CSEBCDICATDEA", \ +"CSEBCDICCAFR", \ +"CSEBCDICDKNO", \ +"CSEBCDICDKNOA", \ +"CSEBCDICES", \ +"CSEBCDICESA", \ +"CSEBCDICESS", \ +"CSEBCDICFISE", \ +"CSEBCDICFISEA", \ +"CSEBCDICFR", \ +"CSEBCDICIT", \ +"CSEBCDICPT", \ +"CSEBCDICUK", \ +"CSEBCDICUS", \ +"CSEUCKR", \ +"CSEUCPKDFMTJAPANESE", \ +"CSGB2312", \ +"CSHPROMAN8", \ +"CSIBM037", \ +"CSIBM038", \ +"CSIBM273", \ +"CSIBM274", \ +"CSIBM275", \ +"CSIBM277", \ +"CSIBM278", \ +"CSIBM280", \ +"CSIBM281", \ +"CSIBM284", \ +"CSIBM285", \ +"CSIBM290", \ +"CSIBM297", \ +"CSIBM420", \ +"CSIBM423", \ +"CSIBM424", \ +"CSIBM500", \ +"CSIBM803", \ +"CSIBM851", \ +"CSIBM855", \ +"CSIBM856", \ +"CSIBM857", \ +"CSIBM860", \ +"CSIBM863", \ +"CSIBM864", \ +"CSIBM865", \ +"CSIBM866", \ +"CSIBM868", \ +"CSIBM869", \ +"CSIBM870", \ +"CSIBM871", \ +"CSIBM880", \ +"CSIBM891", \ +"CSIBM901", \ +"CSIBM902", \ +"CSIBM903", \ +"CSIBM904", \ +"CSIBM905", \ +"CSIBM918", \ +"CSIBM921", \ +"CSIBM922", \ +"CSIBM930", \ +"CSIBM932", \ +"CSIBM933", \ +"CSIBM935", \ +"CSIBM937", \ +"CSIBM939", \ +"CSIBM943", \ +"CSIBM1008", \ +"CSIBM1025", \ +"CSIBM1026", \ +"CSIBM1097", \ +"CSIBM1112", \ +"CSIBM1122", \ +"CSIBM1123", \ +"CSIBM1124", \ +"CSIBM1129", \ +"CSIBM1130", \ +"CSIBM1132", \ +"CSIBM1133", \ +"CSIBM1137", \ +"CSIBM1140", \ +"CSIBM1141", \ +"CSIBM1142", \ +"CSIBM1143", \ +"CSIBM1144", \ +"CSIBM1145", \ +"CSIBM1146", \ +"CSIBM1147", \ +"CSIBM1148", \ +"CSIBM1149", \ +"CSIBM1153", \ +"CSIBM1154", \ +"CSIBM1155", \ +"CSIBM1156", \ +"CSIBM1157", \ +"CSIBM1158", \ +"CSIBM1160", \ +"CSIBM1161", \ +"CSIBM1163", \ +"CSIBM1164", \ +"CSIBM1166", \ +"CSIBM1167", \ +"CSIBM1364", \ +"CSIBM1371", \ +"CSIBM1388", \ +"CSIBM1390", \ +"CSIBM1399", \ +"CSIBM4517", \ +"CSIBM4899", \ +"CSIBM4909", \ +"CSIBM4971", \ +"CSIBM5347", \ +"CSIBM9030", \ +"CSIBM9066", \ +"CSIBM9448", \ +"CSIBM12712", \ +"CSIBM16804", \ +"CSIBM11621162", \ +"CSISO4UNITEDKINGDOM", \ +"CSISO10SWEDISH", \ +"CSISO11SWEDISHFORNAMES", \ +"CSISO14JISC6220RO", \ +"CSISO15ITALIAN", \ +"CSISO16PORTUGESE", \ +"CSISO17SPANISH", \ +"CSISO18GREEK7OLD", \ +"CSISO19LATINGREEK", \ +"CSISO21GERMAN", \ +"CSISO25FRENCH", \ +"CSISO27LATINGREEK1", \ +"CSISO49INIS", \ +"CSISO50INIS8", \ +"CSISO51INISCYRILLIC", \ +"CSISO58GB1988", \ +"CSISO60DANISHNORWEGIAN", \ +"CSISO60NORWEGIAN1", \ +"CSISO61NORWEGIAN2", \ +"CSISO69FRENCH", \ +"CSISO84PORTUGUESE2", \ +"CSISO85SPANISH2", \ +"CSISO86HUNGARIAN", \ +"CSISO88GREEK7", \ +"CSISO89ASMO449", \ +"CSISO90", \ +"CSISO92JISC62991984B", \ +"CSISO99NAPLPS", \ +"CSISO103T618BIT", \ +"CSISO111ECMACYRILLIC", \ +"CSISO121CANADIAN1", \ +"CSISO122CANADIAN2", \ +"CSISO139CSN369103", \ +"CSISO141JUSIB1002", \ +"CSISO143IECP271", \ +"CSISO150", \ +"CSISO150GREEKCCITT", \ +"CSISO151CUBA", \ +"CSISO153GOST1976874", \ +"CSISO646DANISH", \ +"CSISO2022CN", \ +"CSISO2022JP", \ +"CSISO2022JP2", \ +"CSISO2022KR", \ +"CSISO2033", \ +"CSISO5427CYRILLIC", \ +"CSISO5427CYRILLIC1981", \ +"CSISO5428GREEK", \ +"CSISO10367BOX", \ +"CSISOLATIN1", \ +"CSISOLATIN2", \ +"CSISOLATIN3", \ +"CSISOLATIN4", \ +"CSISOLATIN5", \ +"CSISOLATIN6", \ +"CSISOLATINARABIC", \ +"CSISOLATINCYRILLIC", \ +"CSISOLATINGREEK", \ +"CSISOLATINHEBREW", \ +"CSKOI8R", \ +"CSKSC5636", \ +"CSMACINTOSH", \ +"CSNATSDANO", \ +"CSNATSSEFI", \ +"CSN_369103", \ +"CSPC8CODEPAGE437", \ +"CSPC775BALTIC", \ +"CSPC850MULTILINGUAL", \ +"CSPC862LATINHEBREW", \ +"CSPCP852", \ +"CSSHIFTJIS", \ +"CSUCS4", \ +"CSUNICODE", \ +"CSWINDOWS31J", \ +"CUBA", \ +"CWI-2", \ +"CWI", \ +"CYRILLIC", \ +"DE", \ +"DEC-MCS", \ +"DEC", \ +"DECMCS", \ +"DIN_66003", \ +"DK", \ +"DS2089", \ +"DS_2089", \ +"E13B", \ +"EBCDIC-AT-DE-A", \ +"EBCDIC-AT-DE", \ +"EBCDIC-BE", \ +"EBCDIC-BR", \ +"EBCDIC-CA-FR", \ +"EBCDIC-CP-AR1", \ +"EBCDIC-CP-AR2", \ +"EBCDIC-CP-BE", \ +"EBCDIC-CP-CA", \ +"EBCDIC-CP-CH", \ +"EBCDIC-CP-DK", \ +"EBCDIC-CP-ES", \ +"EBCDIC-CP-FI", \ +"EBCDIC-CP-FR", \ +"EBCDIC-CP-GB", \ +"EBCDIC-CP-GR", \ +"EBCDIC-CP-HE", \ +"EBCDIC-CP-IS", \ +"EBCDIC-CP-IT", \ +"EBCDIC-CP-NL", \ +"EBCDIC-CP-NO", \ +"EBCDIC-CP-ROECE", \ +"EBCDIC-CP-SE", \ +"EBCDIC-CP-TR", \ +"EBCDIC-CP-US", \ +"EBCDIC-CP-WT", \ +"EBCDIC-CP-YU", \ +"EBCDIC-CYRILLIC", \ +"EBCDIC-DK-NO-A", \ +"EBCDIC-DK-NO", \ +"EBCDIC-ES-A", \ +"EBCDIC-ES-S", \ +"EBCDIC-ES", \ +"EBCDIC-FI-SE-A", \ +"EBCDIC-FI-SE", \ +"EBCDIC-FR", \ +"EBCDIC-GREEK", \ +"EBCDIC-INT", \ +"EBCDIC-INT1", \ +"EBCDIC-IS-FRISS", \ +"EBCDIC-IT", \ +"EBCDIC-JP-E", \ +"EBCDIC-JP-KANA", \ +"EBCDIC-PT", \ +"EBCDIC-UK", \ +"EBCDIC-US", \ +"EBCDICATDE", \ +"EBCDICATDEA", \ +"EBCDICCAFR", \ +"EBCDICDKNO", \ +"EBCDICDKNOA", \ +"EBCDICES", \ +"EBCDICESA", \ +"EBCDICESS", \ +"EBCDICFISE", \ +"EBCDICFISEA", \ +"EBCDICFR", \ +"EBCDICISFRISS", \ +"EBCDICIT", \ +"EBCDICPT", \ +"EBCDICUK", \ +"EBCDICUS", \ +"ECMA-114", \ +"ECMA-118", \ +"ECMA-128", \ +"ECMA-CYRILLIC", \ +"ECMACYRILLIC", \ +"ELOT_928", \ +"ES", \ +"ES2", \ +"EUC-CN", \ +"EUC-JISX0213", \ +"EUC-JP-MS", \ +"EUC-JP", \ +"EUC-KR", \ +"EUC-TW", \ +"EUCCN", \ +"EUCJP-MS", \ +"EUCJP-OPEN", \ +"EUCJP-WIN", \ +"EUCJP", \ +"EUCKR", \ +"EUCTW", \ +"FI", \ +"FR", \ +"GB", \ +"GB2312", \ +"GB13000", \ +"GB18030", \ +"GBK", \ +"GB_1988-80", \ +"GB_198880", \ +"GEORGIAN-ACADEMY", \ +"GEORGIAN-PS", \ +"GOST_19768-74", \ +"GOST_19768", \ +"GOST_1976874", \ +"GREEK-CCITT", \ +"GREEK", \ +"GREEK7-OLD", \ +"GREEK7", \ +"GREEK7OLD", \ +"GREEK8", \ +"GREEKCCITT", \ +"HEBREW", \ +"HP-GREEK8", \ +"HP-ROMAN8", \ +"HP-ROMAN9", \ +"HP-THAI8", \ +"HP-TURKISH8", \ +"HPGREEK8", \ +"HPROMAN8", \ +"HPROMAN9", \ +"HPTHAI8", \ +"HPTURKISH8", \ +"HU", \ +"IBM-803", \ +"IBM-856", \ +"IBM-901", \ +"IBM-902", \ +"IBM-921", \ +"IBM-922", \ +"IBM-930", \ +"IBM-932", \ +"IBM-933", \ +"IBM-935", \ +"IBM-937", \ +"IBM-939", \ +"IBM-943", \ +"IBM-1008", \ +"IBM-1025", \ +"IBM-1046", \ +"IBM-1047", \ +"IBM-1097", \ +"IBM-1112", \ +"IBM-1122", \ +"IBM-1123", \ +"IBM-1124", \ +"IBM-1129", \ +"IBM-1130", \ +"IBM-1132", \ +"IBM-1133", \ +"IBM-1137", \ +"IBM-1140", \ +"IBM-1141", \ +"IBM-1142", \ +"IBM-1143", \ +"IBM-1144", \ +"IBM-1145", \ +"IBM-1146", \ +"IBM-1147", \ +"IBM-1148", \ +"IBM-1149", \ +"IBM-1153", \ +"IBM-1154", \ +"IBM-1155", \ +"IBM-1156", \ +"IBM-1157", \ +"IBM-1158", \ +"IBM-1160", \ +"IBM-1161", \ +"IBM-1162", \ +"IBM-1163", \ +"IBM-1164", \ +"IBM-1166", \ +"IBM-1167", \ +"IBM-1364", \ +"IBM-1371", \ +"IBM-1388", \ +"IBM-1390", \ +"IBM-1399", \ +"IBM-4517", \ +"IBM-4899", \ +"IBM-4909", \ +"IBM-4971", \ +"IBM-5347", \ +"IBM-9030", \ +"IBM-9066", \ +"IBM-9448", \ +"IBM-12712", \ +"IBM-16804", \ +"IBM037", \ +"IBM038", \ +"IBM256", \ +"IBM273", \ +"IBM274", \ +"IBM275", \ +"IBM277", \ +"IBM278", \ +"IBM280", \ +"IBM281", \ +"IBM284", \ +"IBM285", \ +"IBM290", \ +"IBM297", \ +"IBM367", \ +"IBM420", \ +"IBM423", \ +"IBM424", \ +"IBM437", \ +"IBM500", \ +"IBM775", \ +"IBM803", \ +"IBM813", \ +"IBM819", \ +"IBM848", \ +"IBM850", \ +"IBM851", \ +"IBM852", \ +"IBM855", \ +"IBM856", \ +"IBM857", \ +"IBM860", \ +"IBM861", \ +"IBM862", \ +"IBM863", \ +"IBM864", \ +"IBM865", \ +"IBM866", \ +"IBM866NAV", \ +"IBM868", \ +"IBM869", \ +"IBM870", \ +"IBM871", \ +"IBM874", \ +"IBM875", \ +"IBM880", \ +"IBM891", \ +"IBM901", \ +"IBM902", \ +"IBM903", \ +"IBM904", \ +"IBM905", \ +"IBM912", \ +"IBM915", \ +"IBM916", \ +"IBM918", \ +"IBM920", \ +"IBM921", \ +"IBM922", \ +"IBM930", \ +"IBM932", \ +"IBM933", \ +"IBM935", \ +"IBM937", \ +"IBM939", \ +"IBM943", \ +"IBM1004", \ +"IBM1008", \ +"IBM1025", \ +"IBM1026", \ +"IBM1046", \ +"IBM1047", \ +"IBM1089", \ +"IBM1097", \ +"IBM1112", \ +"IBM1122", \ +"IBM1123", \ +"IBM1124", \ +"IBM1129", \ +"IBM1130", \ +"IBM1132", \ +"IBM1133", \ +"IBM1137", \ +"IBM1140", \ +"IBM1141", \ +"IBM1142", \ +"IBM1143", \ +"IBM1144", \ +"IBM1145", \ +"IBM1146", \ +"IBM1147", \ +"IBM1148", \ +"IBM1149", \ +"IBM1153", \ +"IBM1154", \ +"IBM1155", \ +"IBM1156", \ +"IBM1157", \ +"IBM1158", \ +"IBM1160", \ +"IBM1161", \ +"IBM1162", \ +"IBM1163", \ +"IBM1164", \ +"IBM1166", \ +"IBM1167", \ +"IBM1364", \ +"IBM1371", \ +"IBM1388", \ +"IBM1390", \ +"IBM1399", \ +"IBM4517", \ +"IBM4899", \ +"IBM4909", \ +"IBM4971", \ +"IBM5347", \ +"IBM9030", \ +"IBM9066", \ +"IBM9448", \ +"IBM12712", \ +"IBM16804", \ +"IEC_P27-1", \ +"IEC_P271", \ +"INIS-8", \ +"INIS-CYRILLIC", \ +"INIS", \ +"INIS8", \ +"INISCYRILLIC", \ +"ISIRI-3342", \ +"ISIRI3342", \ +"ISO-2022-CN-EXT", \ +"ISO-2022-CN", \ +"ISO-2022-JP-2", \ +"ISO-2022-JP-3", \ +"ISO-2022-JP", \ +"ISO-2022-KR", \ +"ISO-8859-1", \ +"ISO-8859-2", \ +"ISO-8859-3", \ +"ISO-8859-4", \ +"ISO-8859-5", \ +"ISO-8859-6", \ +"ISO-8859-7", \ +"ISO-8859-8", \ +"ISO-8859-9", \ +"ISO-8859-9E", \ +"ISO-8859-10", \ +"ISO-8859-11", \ +"ISO-8859-13", \ +"ISO-8859-14", \ +"ISO-8859-15", \ +"ISO-8859-16", \ +"ISO-10646", \ +"ISO-CELTIC", \ +"ISO-IR-4", \ +"ISO-IR-6", \ +"ISO-IR-8-1", \ +"ISO-IR-9-1", \ +"ISO-IR-10", \ +"ISO-IR-11", \ +"ISO-IR-14", \ +"ISO-IR-15", \ +"ISO-IR-16", \ +"ISO-IR-17", \ +"ISO-IR-18", \ +"ISO-IR-19", \ +"ISO-IR-21", \ +"ISO-IR-25", \ +"ISO-IR-27", \ +"ISO-IR-37", \ +"ISO-IR-49", \ +"ISO-IR-50", \ +"ISO-IR-51", \ +"ISO-IR-54", \ +"ISO-IR-55", \ +"ISO-IR-57", \ +"ISO-IR-60", \ +"ISO-IR-61", \ +"ISO-IR-69", \ +"ISO-IR-84", \ +"ISO-IR-85", \ +"ISO-IR-86", \ +"ISO-IR-88", \ +"ISO-IR-89", \ +"ISO-IR-90", \ +"ISO-IR-92", \ +"ISO-IR-98", \ +"ISO-IR-99", \ +"ISO-IR-100", \ +"ISO-IR-101", \ +"ISO-IR-103", \ +"ISO-IR-109", \ +"ISO-IR-110", \ +"ISO-IR-111", \ +"ISO-IR-121", \ +"ISO-IR-122", \ +"ISO-IR-126", \ +"ISO-IR-127", \ +"ISO-IR-138", \ +"ISO-IR-139", \ +"ISO-IR-141", \ +"ISO-IR-143", \ +"ISO-IR-144", \ +"ISO-IR-148", \ +"ISO-IR-150", \ +"ISO-IR-151", \ +"ISO-IR-153", \ +"ISO-IR-155", \ +"ISO-IR-156", \ +"ISO-IR-157", \ +"ISO-IR-166", \ +"ISO-IR-179", \ +"ISO-IR-193", \ +"ISO-IR-197", \ +"ISO-IR-199", \ +"ISO-IR-203", \ +"ISO-IR-209", \ +"ISO-IR-226", \ +"ISO646-CA", \ +"ISO646-CA2", \ +"ISO646-CN", \ +"ISO646-CU", \ +"ISO646-DE", \ +"ISO646-DK", \ +"ISO646-ES", \ +"ISO646-ES2", \ +"ISO646-FI", \ +"ISO646-FR", \ +"ISO646-FR1", \ +"ISO646-GB", \ +"ISO646-HU", \ +"ISO646-IT", \ +"ISO646-JP-OCR-B", \ +"ISO646-JP", \ +"ISO646-KR", \ +"ISO646-NO", \ +"ISO646-NO2", \ +"ISO646-PT", \ +"ISO646-PT2", \ +"ISO646-SE", \ +"ISO646-SE2", \ +"ISO646-US", \ +"ISO646-YU", \ +"ISO2022CN", \ +"ISO2022CNEXT", \ +"ISO2022JP", \ +"ISO2022JP2", \ +"ISO2022KR", \ +"ISO6937", \ +"ISO8859-1", \ +"ISO8859-2", \ +"ISO8859-3", \ +"ISO8859-4", \ +"ISO8859-5", \ +"ISO8859-6", \ +"ISO8859-7", \ +"ISO8859-8", \ +"ISO8859-9", \ +"ISO8859-9E", \ +"ISO8859-10", \ +"ISO8859-11", \ +"ISO8859-13", \ +"ISO8859-14", \ +"ISO8859-15", \ +"ISO8859-16", \ +"ISO11548-1", \ +"ISO88591", \ +"ISO88592", \ +"ISO88593", \ +"ISO88594", \ +"ISO88595", \ +"ISO88596", \ +"ISO88597", \ +"ISO88598", \ +"ISO88599", \ +"ISO88599E", \ +"ISO885910", \ +"ISO885911", \ +"ISO885913", \ +"ISO885914", \ +"ISO885915", \ +"ISO885916", \ +"ISO_646.IRV:1991", \ +"ISO_2033-1983", \ +"ISO_2033", \ +"ISO_5427-EXT", \ +"ISO_5427", \ +"ISO_5427:1981", \ +"ISO_5427EXT", \ +"ISO_5428", \ +"ISO_5428:1980", \ +"ISO_6937-2", \ +"ISO_6937-2:1983", \ +"ISO_6937", \ +"ISO_6937:1992", \ +"ISO_8859-1", \ +"ISO_8859-1:1987", \ +"ISO_8859-2", \ +"ISO_8859-2:1987", \ +"ISO_8859-3", \ +"ISO_8859-3:1988", \ +"ISO_8859-4", \ +"ISO_8859-4:1988", \ +"ISO_8859-5", \ +"ISO_8859-5:1988", \ +"ISO_8859-6", \ +"ISO_8859-6:1987", \ +"ISO_8859-7", \ +"ISO_8859-7:1987", \ +"ISO_8859-7:2003", \ +"ISO_8859-8", \ +"ISO_8859-8:1988", \ +"ISO_8859-9", \ +"ISO_8859-9:1989", \ +"ISO_8859-9E", \ +"ISO_8859-10", \ +"ISO_8859-10:1992", \ +"ISO_8859-14", \ +"ISO_8859-14:1998", \ +"ISO_8859-15", \ +"ISO_8859-15:1998", \ +"ISO_8859-16", \ +"ISO_8859-16:2001", \ +"ISO_9036", \ +"ISO_10367-BOX", \ +"ISO_10367BOX", \ +"ISO_11548-1", \ +"ISO_69372", \ +"IT", \ +"JIS_C6220-1969-RO", \ +"JIS_C6229-1984-B", \ +"JIS_C62201969RO", \ +"JIS_C62291984B", \ +"JOHAB", \ +"JP-OCR-B", \ +"JP", \ +"JS", \ +"JUS_I.B1.002", \ +"KOI-7", \ +"KOI-8", \ +"KOI8-R", \ +"KOI8-RU", \ +"KOI8-T", \ +"KOI8-U", \ +"KOI8", \ +"KOI8R", \ +"KOI8U", \ +"KSC5636", \ +"L1", \ +"L2", \ +"L3", \ +"L4", \ +"L5", \ +"L6", \ +"L7", \ +"L8", \ +"L10", \ +"LATIN-9", \ +"LATIN-GREEK-1", \ +"LATIN-GREEK", \ +"LATIN1", \ +"LATIN2", \ +"LATIN3", \ +"LATIN4", \ +"LATIN5", \ +"LATIN6", \ +"LATIN7", \ +"LATIN8", \ +"LATIN10", \ +"LATINGREEK", \ +"LATINGREEK1", \ +"MAC-CENTRALEUROPE", \ +"MAC-CYRILLIC", \ +"MAC-IS", \ +"MAC-SAMI", \ +"MAC-UK", \ +"MAC", \ +"MACCYRILLIC", \ +"MACINTOSH", \ +"MACIS", \ +"MACUK", \ +"MACUKRAINIAN", \ +"MIK", \ +"MS-ANSI", \ +"MS-ARAB", \ +"MS-CYRL", \ +"MS-EE", \ +"MS-GREEK", \ +"MS-HEBR", \ +"MS-MAC-CYRILLIC", \ +"MS-TURK", \ +"MS932", \ +"MS936", \ +"MSCP949", \ +"MSCP1361", \ +"MSMACCYRILLIC", \ +"MSZ_7795.3", \ +"MS_KANJI", \ +"NAPLPS", \ +"NATS-DANO", \ +"NATS-SEFI", \ +"NATSDANO", \ +"NATSSEFI", \ +"NC_NC0010", \ +"NC_NC00-10", \ +"NC_NC00-10:81", \ +"NF_Z_62-010", \ +"NF_Z_62-010_(1973)", \ +"NF_Z_62-010_1973", \ +"NF_Z_62010", \ +"NF_Z_62010_1973", \ +"NO", \ +"NO2", \ +"NS_4551-1", \ +"NS_4551-2", \ +"NS_45511", \ +"NS_45512", \ +"OS2LATIN1", \ +"OSF00010001", \ +"OSF00010002", \ +"OSF00010003", \ +"OSF00010004", \ +"OSF00010005", \ +"OSF00010006", \ +"OSF00010007", \ +"OSF00010008", \ +"OSF00010009", \ +"OSF0001000A", \ +"OSF00010020", \ +"OSF00010100", \ +"OSF00010101", \ +"OSF00010102", \ +"OSF00010104", \ +"OSF00010105", \ +"OSF00010106", \ +"OSF00030010", \ +"OSF0004000A", \ +"OSF0005000A", \ +"OSF05010001", \ +"OSF100201A4", \ +"OSF100201A8", \ +"OSF100201B5", \ +"OSF100201F4", \ +"OSF100203B5", \ +"OSF1002011C", \ +"OSF1002011D", \ +"OSF1002035D", \ +"OSF1002035E", \ +"OSF1002035F", \ +"OSF1002036B", \ +"OSF1002037B", \ +"OSF10010001", \ +"OSF10010004", \ +"OSF10010006", \ +"OSF10020025", \ +"OSF10020111", \ +"OSF10020115", \ +"OSF10020116", \ +"OSF10020118", \ +"OSF10020122", \ +"OSF10020129", \ +"OSF10020352", \ +"OSF10020354", \ +"OSF10020357", \ +"OSF10020359", \ +"OSF10020360", \ +"OSF10020364", \ +"OSF10020365", \ +"OSF10020366", \ +"OSF10020367", \ +"OSF10020370", \ +"OSF10020387", \ +"OSF10020388", \ +"OSF10020396", \ +"OSF10020402", \ +"OSF10020417", \ +"PT", \ +"PT2", \ +"PT154", \ +"R8", \ +"R9", \ +"RK1048", \ +"ROMAN8", \ +"ROMAN9", \ +"RUSCII", \ +"SE", \ +"SE2", \ +"SEN_850200_B", \ +"SEN_850200_C", \ +"SHIFT-JIS", \ +"SHIFT_JIS", \ +"SHIFT_JISX0213", \ +"SJIS-OPEN", \ +"SJIS-WIN", \ +"SJIS", \ +"SS636127", \ +"STRK1048-2002", \ +"ST_SEV_358-88", \ +"T.61-8BIT", \ +"T.61", \ +"T.618BIT", \ +"TCVN-5712", \ +"TCVN", \ +"TCVN5712-1", \ +"TCVN5712-1:1993", \ +"THAI8", \ +"TIS-620", \ +"TIS620-0", \ +"TIS620.2529-1", \ +"TIS620.2533-0", \ +"TIS620", \ +"TS-5881", \ +"TSCII", \ +"TURKISH8", \ +"UCS-2", \ +"UCS-2BE", \ +"UCS-2LE", \ +"UCS-4", \ +"UCS-4BE", \ +"UCS-4LE", \ +"UCS2", \ +"UCS4", \ +"UHC", \ +"UJIS", \ +"UK", \ +"UNICODE", \ +"UNICODEBIG", \ +"UNICODELITTLE", \ +"US-ASCII", \ +"US", \ +"UTF-7", \ +"UTF-8", \ +"UTF-16", \ +"UTF-16BE", \ +"UTF-16LE", \ +"UTF-32", \ +"UTF-32BE", \ +"UTF-32LE", \ +"UTF7", \ +"UTF8", \ +"UTF16", \ +"UTF16BE", \ +"UTF16LE", \ +"UTF32", \ +"UTF32BE", \ +"UTF32LE", \ +"VISCII", \ +"WCHAR_T", \ +"WIN-SAMI-2", \ +"WINBALTRIM", \ +"WINDOWS-31J", \ +"WINDOWS-874", \ +"WINDOWS-936", \ +"WINDOWS-1250", \ +"WINDOWS-1251", \ +"WINDOWS-1252", \ +"WINDOWS-1253", \ +"WINDOWS-1254", \ +"WINDOWS-1255", \ +"WINDOWS-1256", \ +"WINDOWS-1257", \ +"WINDOWS-1258", \ +"WINSAMI2", \ +"WS2", \ +"YU", diff --git a/gdb/charset.c b/gdb/charset.c index 32eb9c3..5efb849 100644 --- a/gdb/charset.c +++ b/gdb/charset.c @@ -21,27 +21,31 @@ #include "charset.h" #include "gdbcmd.h" #include "gdb_assert.h" +#include "gdb_obstack.h" +#include "charset-list.h" +#include "vec.h" #include <stddef.h> #include "gdb_string.h" #include <ctype.h> -#ifdef HAVE_ICONV -#include <iconv.h> -#endif - /* How GDB's character set support works - GDB has two global settings: + GDB has three global settings: - The `current host character set' is the character set GDB should use in talking to the user, and which (hopefully) the user's - terminal knows how to display properly. + terminal knows how to display properly. Most users should not + change this. - The `current target character set' is the character set the program being debugged uses. + - The `current target wide character set' is the wide character set + the program being debugged uses, that is, the encoding used for + wchar_t. + There are commands to set each of these, and mechanisms for choosing reasonable default values. GDB has a global list of character sets that it can use as its host or target character @@ -57,118 +61,141 @@ characters the user enters in expressions (mostly host->target conversions), - and so on. - - Now, many of these operations are specific to a particular - host/target character set pair. If GDB supports N character sets, - there are N^2 possible pairs. This means that, the larger GDB's - repertoire of character sets gets, the more expensive it gets to add - new character sets. - - To make sure that GDB can do the right thing for every possible - pairing of host and target character set, while still allowing - GDB's repertoire to scale, we use a two-tiered approach: - - - We maintain a global table of "translations" --- groups of - functions specific to a particular pair of character sets. - - - However, a translation can be incomplete: some functions can be - omitted. Where there is not a translation to specify exactly - what function to use, we provide reasonable defaults. The - default behaviors try to use the "iconv" library functions, which - support a wide range of character sets. However, even if iconv - is not available, there are fallbacks to support trivial - translations: when the host and target character sets are the - same. */ + and so on. + + To avoid excessive code duplication and maintenance efforts, + GDB simply requires a capable iconv function. Users on platforms + without a suitable iconv can use the GNU iconv library. */ -/* The character set and translation structures. */ - +#ifdef PHONY_ICONV -/* A character set GDB knows about. GDB only supports character sets - with stateless encodings, in which every character is one byte - long. */ -struct charset { +/* Provide a phony iconv that does as little as possible. Also, + arrange for there to be a single available character set. */ - /* A singly-linked list of all known charsets. */ - struct charset *next; +#undef GDB_DEFAULT_HOST_CHARSET +#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" +#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" +#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1" +#undef DEFAULT_CHARSET_NAMES +#define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET , + +#undef iconv_t +#define iconv_t int +#undef iconv_open +#undef iconv +#undef iconv_close + +iconv_t +iconv_open (const char *to, const char *from) +{ + /* We allow conversions from UCS-4BE, wchar_t, and the host charset. + We allow conversions to wchar_t and the host charset. */ + if (strcmp (from, "UCS-4BE") && strcmp (from, "wchar_t") + && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) + return -1; + if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) + return -1; - /* The name of the character set. Comparisons on character set - names are case-sensitive. */ - const char *name; + /* Return 1 if we are converting from UCS-4BE, 0 otherwise. This is + used as a flag in calls to iconv. */ + return !strcmp (from, "UCS-4BE"); +} - /* Non-zero iff this character set can be used as a host character - set. At present, GDB basically assumes that the host character - set is a superset of ASCII. */ - int valid_host_charset; +int +iconv_close (iconv_t arg) +{ + return 0; +} - /* Pointers to charset-specific functions that depend only on a - single character set, and data pointers to pass to them. */ - int (*host_char_print_literally) (void *baton, - int host_char); - void *host_char_print_literally_baton; +size_t +iconv (iconv_t ucs_flag, char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + if (ucs_flag) + { + while (*inbytesleft >= 4) + { + size_t j; + unsigned long c = 0; + + for (j = 0; j < 4; ++j) + { + c <<= 8; + c += (*inbuf)[j] & 0xff; + } + + if (c >= 256) + { + errno = EILSEQ; + return -1; + } + **outbuf = c & 0xff; + ++*outbuf; + --*outbytesleft; + + ++*inbuf; + *inbytesleft -= 4; + } + if (*inbytesleft < 4) + { + errno = EINVAL; + return -1; + } + } + else + { + /* In all other cases we simply copy input bytes to the + output. */ + size_t amt = *inbytesleft; + if (amt > *outbytesleft) + amt = *outbytesleft; + memcpy (*outbuf, *inbuf, amt); + *inbuf += amt; + *outbuf += amt; + *inbytesleft -= amt; + *outbytesleft -= amt; + } - int (*target_char_to_control_char) (void *baton, - int target_char, - int *target_ctrl_char); - void *target_char_to_control_char_baton; -}; + if (*inbytesleft) + { + errno = E2BIG; + return -1; + } + /* The number of non-reversible conversions -- but they were all + reversible. */ + return 0; +} -/* A translation from one character set to another. */ -struct translation { - - /* A singly-linked list of all known translations. */ - struct translation *next; - - /* This structure describes functions going from the FROM character - set to the TO character set. Comparisons on character set names - are case-sensitive. */ - const char *from, *to; - - /* Pointers to translation-specific functions, and data pointers to - pass to them. These pointers can be zero, indicating that GDB - should fall back on the default behavior. We hope the default - behavior will be correct for many from/to pairs, reducing the - number of translations that need to be registered explicitly. */ - - /* TARGET_CHAR is in the `from' charset. - Returns a string in the `to' charset. */ - const char *(*c_target_char_has_backslash_escape) (void *baton, - int target_char); - void *c_target_char_has_backslash_escape_baton; - - /* HOST_CHAR is in the `from' charset. - TARGET_CHAR points to a char in the `to' charset. */ - int (*c_parse_backslash) (void *baton, int host_char, int *target_char); - void *c_parse_backslash_baton; - - /* This is used for the host_char_to_target and target_char_to_host - functions. */ - int (*convert_char) (void *baton, int from, int *to); - void *convert_char_baton; -}; +#endif /* The global lists of character sets and translations. */ -#ifndef GDB_DEFAULT_HOST_CHARSET -#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" -#endif - #ifndef GDB_DEFAULT_TARGET_CHARSET #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" #endif -static const char *host_charset_name = GDB_DEFAULT_HOST_CHARSET; +#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET +#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UCS-4" +#endif + +static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET; +static const char *host_charset_name = "auto"; static void show_host_charset_name (struct ui_file *file, int from_tty, struct cmd_list_element *c, const char *value) { - fprintf_filtered (file, _("The host character set is \"%s\".\n"), value); + if (!strcmp (value, "auto")) + fprintf_filtered (file, + _("The host character set is \"auto; currently %s\".\n"), + auto_host_charset_name); + else + fprintf_filtered (file, _("The host character set is \"%s\".\n"), value); } static const char *target_charset_name = GDB_DEFAULT_TARGET_CHARSET; @@ -180,1060 +207,547 @@ show_target_charset_name (struct ui_file *file, int from_tty, value); } - -static const char *host_charset_enum[] = +static const char *target_wide_charset_name = GDB_DEFAULT_TARGET_WIDE_CHARSET; +static void +show_target_wide_charset_name (struct ui_file *file, int from_tty, + struct cmd_list_element *c, const char *value) { - "ASCII", - "ISO-8859-1", - 0 -}; + fprintf_filtered (file, _("The target wide character set is \"%s\".\n"), + value); +} -static const char *target_charset_enum[] = +static const char *default_charset_names[] = { - "ASCII", - "ISO-8859-1", - "EBCDIC-US", - "IBM1047", + DEFAULT_CHARSET_NAMES 0 }; -/* The global list of all the charsets GDB knows about. */ -static struct charset *all_charsets; +static const char **charset_enum; + +/* If the target wide character set has big- or little-endian + variants, these are the corresponding names. */ +static const char *target_wide_charset_be_name; +static const char *target_wide_charset_le_name; -static void -register_charset (struct charset *cs) -{ - struct charset **ptr; - - /* Put the new charset on the end, so that the list ends up in the - same order as the registrations in the _initialize function. */ - for (ptr = &all_charsets; *ptr; ptr = &(*ptr)->next) - ; - - cs->next = 0; - *ptr = cs; -} - +/* A helper function for validate which sets the target wide big- and + little-endian character set names, if possible. */ -static struct charset * -lookup_charset (const char *name) +static void +set_be_le_names (void) { - struct charset *cs; + int i, len; - for (cs = all_charsets; cs; cs = cs->next) - if (! strcmp (name, cs->name)) - return cs; + target_wide_charset_le_name = NULL; + target_wide_charset_be_name = NULL; - return NULL; + len = strlen (target_wide_charset_name); + for (i = 0; charset_enum[i]; ++i) + { + if (strncmp (target_wide_charset_name, charset_enum[i], len)) + continue; + if ((charset_enum[i][len] == 'B' + || charset_enum[i][len] == 'L') + && charset_enum[i][len + 1] == 'E' + && charset_enum[i][len + 2] == '\0') + { + if (charset_enum[i][len] == 'B') + target_wide_charset_be_name = charset_enum[i]; + else + target_wide_charset_le_name = charset_enum[i]; + } + } } - -/* The global list of translations. */ -static struct translation *all_translations; - +/* 'Set charset', 'set host-charset', 'set target-charset', 'set + target-wide-charset', 'set charset' sfunc's. */ static void -register_translation (struct translation *t) -{ - t->next = all_translations; - all_translations = t; -} - - -static struct translation * -lookup_translation (const char *from, const char *to) +validate (void) { - struct translation *t; + iconv_t desc; + const char *host_cset = host_charset (); - for (t = all_translations; t; t = t->next) - if (! strcmp (from, t->from) - && ! strcmp (to, t->to)) - return t; - - return 0; -} + desc = iconv_open (target_wide_charset_name, host_cset); + if (desc == (iconv_t) -1) + error ("Cannot convert between character sets `%s' and `%s'", + target_wide_charset_name, host_cset); + iconv_close (desc); + desc = iconv_open (target_charset_name, host_cset); + if (desc == (iconv_t) -1) + error ("Cannot convert between character sets `%s' and `%s'", + target_charset_name, host_cset); + iconv_close (desc); - -/* Constructing charsets. */ - -/* Allocate, initialize and return a straightforward charset. - Use this function, rather than creating the structures yourself, - so that we can add new fields to the structure in the future without - having to tweak all the old charset descriptions. */ -static struct charset * -simple_charset (const char *name, - int valid_host_charset, - int (*host_char_print_literally) (void *baton, int host_char), - void *host_char_print_literally_baton, - int (*target_char_to_control_char) (void *baton, - int target_char, - int *target_ctrl_char), - void *target_char_to_control_char_baton) -{ - struct charset *cs = xmalloc (sizeof (*cs)); - - memset (cs, 0, sizeof (*cs)); - cs->name = name; - cs->valid_host_charset = valid_host_charset; - cs->host_char_print_literally = host_char_print_literally; - cs->host_char_print_literally_baton = host_char_print_literally_baton; - cs->target_char_to_control_char = target_char_to_control_char; - cs->target_char_to_control_char_baton = target_char_to_control_char_baton; - - return cs; + set_be_le_names (); } - - -/* ASCII functions. */ - -static int -ascii_print_literally (void *baton, int c) +/* This is the sfunc for the 'set charset' command. */ +static void +set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c) { - c &= 0xff; - - return (0x20 <= c && c <= 0x7e); + /* CAREFUL: set the target charset here as well. */ + target_charset_name = host_charset_name; + validate (); } - -static int -ascii_to_control (void *baton, int c, int *ctrl_char) +/* 'set host-charset' command sfunc. We need a wrapper here because + the function needs to have a specific signature. */ +static void +set_host_charset_sfunc (char *charset, int from_tty, + struct cmd_list_element *c) { - *ctrl_char = (c & 037); - return 1; + validate (); } - -/* ISO-8859 family functions. */ - - -static int -iso_8859_print_literally (void *baton, int c) +/* Wrapper for the 'set target-charset' command. */ +static void +set_target_charset_sfunc (char *charset, int from_tty, + struct cmd_list_element *c) { - c &= 0xff; - - return ((0x20 <= c && c <= 0x7e) /* ascii printables */ - || (! sevenbit_strings && 0xA0 <= c)); /* iso 8859 printables */ + validate (); } - -static int -iso_8859_to_control (void *baton, int c, int *ctrl_char) +/* Wrapper for the 'set target-wide-charset' command. */ +static void +set_target_wide_charset_sfunc (char *charset, int from_tty, + struct cmd_list_element *c) { - *ctrl_char = (c & 0200) | (c & 037); - return 1; + validate (); } - -/* Construct an ISO-8859-like character set. */ -static struct charset * -iso_8859_family_charset (const char *name) +/* sfunc for the 'show charset' command. */ +static void +show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c, + const char *name) { - return simple_charset (name, 1, - iso_8859_print_literally, 0, - iso_8859_to_control, 0); + show_host_charset_name (file, from_tty, c, host_charset_name); + show_target_charset_name (file, from_tty, c, target_charset_name); + show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name); } - -/* EBCDIC family functions. */ - - -static int -ebcdic_print_literally (void *baton, int c) -{ - c &= 0xff; - - return (64 <= c && c <= 254); -} - +/* Accessor functions. */ -static int -ebcdic_to_control (void *baton, int c, int *ctrl_char) +const char * +host_charset (void) { - /* There are no control character equivalents in EBCDIC. Use - numeric escapes. */ - return 0; + if (!strcmp (host_charset_name, "auto")) + return auto_host_charset_name; + return host_charset_name; } - -/* Construct an EBCDIC-like character set. */ -static struct charset * -ebcdic_family_charset (const char *name) +const char * +target_charset (void) { - return simple_charset (name, 0, - ebcdic_print_literally, 0, - ebcdic_to_control, 0); + return target_charset_name; } - - - - - -/* Fallback functions using iconv. */ - -#if defined(HAVE_ICONV) - -struct cached_iconv { - struct charset *from, *to; - iconv_t i; -}; - -/* Make sure the iconv cache *CI contains an iconv descriptor - translating from FROM to TO. If it already does, fine; otherwise, - close any existing descriptor, and open up a new one. On success, - return zero; on failure, return -1 and set errno. */ -static int -check_iconv_cache (struct cached_iconv *ci, - struct charset *from, - struct charset *to) +const char * +target_wide_charset (void) { - iconv_t i; - - /* Does the cached iconv descriptor match the conversion we're trying - to do now? */ - if (ci->from == from - && ci->to == to - && ci->i != (iconv_t) 0) - return 0; - - /* It doesn't. If we actually had any iconv descriptor open at - all, close it now. */ - if (ci->i != (iconv_t) 0) + if (gdbarch_byte_order (current_gdbarch) == BFD_ENDIAN_BIG) { - i = ci->i; - ci->i = (iconv_t) 0; - - if (iconv_close (i) == -1) - error (_("Error closing `iconv' descriptor for " - "`%s'-to-`%s' character conversion: %s"), - ci->from->name, ci->to->name, safe_strerror (errno)); + if (target_wide_charset_be_name) + return target_wide_charset_be_name; } - - /* Open a new iconv descriptor for the required conversion. */ - i = iconv_open (to->name, from->name); - if (i == (iconv_t) -1) - return -1; - - ci->i = i; - ci->from = from; - ci->to = to; - - return 0; -} - - -/* Convert FROM_CHAR using the cached iconv conversion *CI. Return - non-zero if the conversion was successful, zero otherwise. */ -static int -cached_iconv_convert (struct cached_iconv *ci, int from_char, int *to_char) -{ - char from; - ICONV_CONST char *from_ptr = &from; - char to, *to_ptr = &to; - size_t from_left = sizeof (from), to_left = sizeof (to); - - gdb_assert (ci->i != (iconv_t) 0); - - from = from_char; - if (iconv (ci->i, &from_ptr, &from_left, &to_ptr, &to_left) - == (size_t) -1) + else { - /* These all suggest that the input or output character sets - have multi-byte encodings of some characters, which means - it's unsuitable for use as a GDB character set. We should - never have selected it. */ - gdb_assert (errno != E2BIG && errno != EINVAL); - - /* This suggests a bug in the code managing *CI. */ - gdb_assert (errno != EBADF); - - /* This seems to mean that there is no equivalent character in - the `to' character set. */ - if (errno == EILSEQ) - return 0; - - /* Anything else is mysterious. */ - internal_error (__FILE__, __LINE__, - _("Error converting character `%d' from `%s' to `%s' " - "character set: %s"), - from_char, ci->from->name, ci->to->name, - safe_strerror (errno)); + if (target_wide_charset_le_name) + return target_wide_charset_le_name; } - /* If the pointers weren't advanced across the input, that also - suggests something was wrong. */ - gdb_assert (from_left == 0 && to_left == 0); - - *to_char = (unsigned char) to; - return 1; + return target_wide_charset_name; } - -static void -register_iconv_charsets (void) -{ - /* Here we should check whether various character sets were - recognized by the local iconv implementation. - - The first implementation registered a bunch of character sets - recognized by iconv, but then we discovered that iconv on Solaris - and iconv on GNU/Linux had no character sets in common. So we - replaced them with the hard-coded tables that appear later in the - file. */ -} - -#endif /* defined (HAVE_ICONV) */ - -/* Fallback routines for systems without iconv. */ +/* Host character set management. For the time being, we assume that + the host character set is some superset of ASCII. */ -#if ! defined (HAVE_ICONV) -struct cached_iconv { char nothing; }; - -static int -check_iconv_cache (struct cached_iconv *ci, - struct charset *from, - struct charset *to) -{ - errno = EINVAL; - return -1; -} - -static int -cached_iconv_convert (struct cached_iconv *ci, int from_char, int *to_char) -{ - /* This function should never be called. */ - gdb_assert (0); -} - -static void -register_iconv_charsets (void) +char +host_letter_to_control_character (char c) { + if (c == '?') + return 0177; + return c & 0237; } -#endif /* ! defined(HAVE_ICONV) */ +/* Convert a host character, C, to its hex value. C must already have + been validated using isxdigit. */ - -/* Default trivial conversion functions. */ - -static int -identity_either_char_to_other (void *baton, int either_char, int *other_char) +int +host_hex_value (char c) { - *other_char = either_char; - return 1; + if (isdigit (c)) + return c - '0'; + if (c >= 'a' && c <= 'f') + return 10 + c - 'a'; + gdb_assert (c >= 'A' && c <= 'F'); + return 10 + c - 'A'; } - -/* Default non-trivial conversion functions. */ - - -static char backslashable[] = "abfnrtv"; -static char *backslashed[] = {"a", "b", "f", "n", "r", "t", "v", "0"}; -static char represented[] = "\a\b\f\n\r\t\v"; - - -/* Translate TARGET_CHAR into the host character set, and see if it - matches any of our standard escape sequences. */ -static const char * -default_c_target_char_has_backslash_escape (void *baton, int target_char) -{ - int host_char; - const char *ix; - - /* If target_char has no equivalent in the host character set, - assume it doesn't have a backslashed form. */ - if (! target_char_to_host (target_char, &host_char)) - return NULL; - - ix = strchr (represented, host_char); - if (ix) - return backslashed[ix - represented]; - else - return NULL; -} - - -/* Translate the backslash the way we would in the host character set, - and then try to translate that into the target character set. */ -static int -default_c_parse_backslash (void *baton, int host_char, int *target_char) -{ - const char *ix; - - ix = strchr (backslashable, host_char); - - if (! ix) - return 0; - else - return host_char_to_target (represented[ix - backslashable], - target_char); -} +/* Public character management functions. */ +/* A cleanup function which is run to close an iconv descriptor. */ -/* Convert using a cached iconv descriptor. */ -static int -iconv_convert (void *baton, int from_char, int *to_char) +static void +cleanup_iconv (void *p) { - struct cached_iconv *ci = baton; - return cached_iconv_convert (ci, from_char, to_char); + iconv_t *descp = p; + iconv_close (*descp); } +void +convert_between_encodings (const char *from, const char *to, + const gdb_byte *bytes, unsigned int num_bytes, + int width, struct obstack *output, + enum transliterations translit) +{ + iconv_t desc; + struct cleanup *cleanups; + size_t inleft; + char *inp; + unsigned int space_request; + + /* Often, the host and target charsets will be the same. */ + if (!strcmp (from, to)) + { + obstack_grow (output, bytes, num_bytes); + return; + } - -/* Conversion tables. */ - - -/* I'd much rather fall back on iconv whenever possible. But the - character set names you use with iconv aren't standardized at all, - a lot of platforms have really meager character set coverage, etc. - I wanted to have at least something we could use to exercise the - test suite on all platforms. - - In the long run, we should have a configure-time process explore - somehow which character sets the host platform supports, and some - arrangement that allows GDB users to use platform-indepedent names - for character sets. */ - - -/* We generated these tables using iconv on a GNU/Linux machine. */ - - -static int ascii_to_iso_8859_1_table[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 16 */ - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 32 */ - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 48 */ - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 64 */ - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, /* 80 */ - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, /* 96 */ - 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, /* 112 */ - 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, /* 128 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 144 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 160 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 /* 256 */ -}; - - -static int ascii_to_ebcdic_us_table[] = { - 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15, /* 16 */ - 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, /* 32 */ - 64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97, /* 48 */ - 240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111, /* 64 */ - 124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214, /* 80 */ - 215,216,217,226,227,228,229,230,231,232,233, -1,224, -1, -1,109, /* 96 */ - 121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150, /* 112 */ - 151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161, 7, /* 128 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 144 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 160 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 /* 256 */ -}; - - -static int ascii_to_ibm1047_table[] = { - 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15, /* 16 */ - 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, /* 32 */ - 64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97, /* 48 */ - 240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111, /* 64 */ - 124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214, /* 80 */ - 215,216,217,226,227,228,229,230,231,232,233,173,224,189, 95,109, /* 96 */ - 121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150, /* 112 */ - 151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161, 7, /* 128 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 144 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 160 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 /* 256 */ -}; - - -static int iso_8859_1_to_ascii_table[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 16 */ - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 32 */ - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 48 */ - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 64 */ - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, /* 80 */ - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, /* 96 */ - 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, /* 112 */ - 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, /* 128 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 144 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 160 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 /* 256 */ -}; - - -static int iso_8859_1_to_ebcdic_us_table[] = { - 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15, /* 16 */ - 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, /* 32 */ - 64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97, /* 48 */ - 240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111, /* 64 */ - 124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214, /* 80 */ - 215,216,217,226,227,228,229,230,231,232,233, -1,224, -1, -1,109, /* 96 */ - 121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150, /* 112 */ - 151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161, 7, /* 128 */ - 32, 33, 34, 35, 36, 21, 6, 23, 40, 41, 42, 43, 44, 9, 10, 27, /* 144 */ - 48, 49, 26, 51, 52, 53, 54, 8, 56, 57, 58, 59, 4, 20, 62,255, /* 160 */ - -1, -1, 74, -1, -1, -1,106, -1, -1, -1, -1, -1, 95, -1, -1, -1, /* 176 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 /* 256 */ -}; - - -static int iso_8859_1_to_ibm1047_table[] = { - 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15, /* 16 */ - 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, /* 32 */ - 64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97, /* 48 */ - 240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111, /* 64 */ - 124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214, /* 80 */ - 215,216,217,226,227,228,229,230,231,232,233,173,224,189, 95,109, /* 96 */ - 121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150, /* 112 */ - 151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161, 7, /* 128 */ - 32, 33, 34, 35, 36, 21, 6, 23, 40, 41, 42, 43, 44, 9, 10, 27, /* 144 */ - 48, 49, 26, 51, 52, 53, 54, 8, 56, 57, 58, 59, 4, 20, 62,255, /* 160 */ - 65,170, 74,177,159,178,106,181,187,180,154,138,176,202,175,188, /* 176 */ - 144,143,234,250,190,160,182,179,157,218,155,139,183,184,185,171, /* 192 */ - 100,101, 98,102, 99,103,158,104,116,113,114,115,120,117,118,119, /* 208 */ - 172,105,237,238,235,239,236,191,128,253,254,251,252,186,174, 89, /* 224 */ - 68, 69, 66, 70, 67, 71,156, 72, 84, 81, 82, 83, 88, 85, 86, 87, /* 240 */ - 140, 73,205,206,203,207,204,225,112,221,222,219,220,141,142,223 /* 256 */ -}; - - -static int ebcdic_us_to_ascii_table[] = { - 0, 1, 2, 3, -1, 9, -1,127, -1, -1, -1, 11, 12, 13, 14, 15, /* 16 */ - 16, 17, 18, 19, -1, -1, 8, -1, 24, 25, -1, -1, 28, 29, 30, 31, /* 32 */ - -1, -1, -1, -1, -1, 10, 23, 27, -1, -1, -1, -1, -1, 5, 6, 7, /* 48 */ - -1, -1, 22, -1, -1, -1, -1, 4, -1, -1, -1, -1, 20, 21, -1, 26, /* 64 */ - 32, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 46, 60, 40, 43,124, /* 80 */ - 38, -1, -1, -1, -1, -1, -1, -1, -1, -1, 33, 36, 42, 41, 59, -1, /* 96 */ - 45, 47, -1, -1, -1, -1, -1, -1, -1, -1, -1, 44, 37, 95, 62, 63, /* 112 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, 96, 58, 35, 64, 39, 61, 34, /* 128 */ - -1, 97, 98, 99,100,101,102,103,104,105, -1, -1, -1, -1, -1, -1, /* 144 */ - -1,106,107,108,109,110,111,112,113,114, -1, -1, -1, -1, -1, -1, /* 160 */ - -1,126,115,116,117,118,119,120,121,122, -1, -1, -1, -1, -1, -1, /* 176 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */ - 123, 65, 66, 67, 68, 69, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, /* 208 */ - 125, 74, 75, 76, 77, 78, 79, 80, 81, 82, -1, -1, -1, -1, -1, -1, /* 224 */ - 92, -1, 83, 84, 85, 86, 87, 88, 89, 90, -1, -1, -1, -1, -1, -1, /* 240 */ - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1, -1 /* 256 */ -}; - - -static int ebcdic_us_to_iso_8859_1_table[] = { - 0, 1, 2, 3,156, 9,134,127,151,141,142, 11, 12, 13, 14, 15, /* 16 */ - 16, 17, 18, 19,157,133, 8,135, 24, 25,146,143, 28, 29, 30, 31, /* 32 */ - 128,129,130,131,132, 10, 23, 27,136,137,138,139,140, 5, 6, 7, /* 48 */ - 144,145, 22,147,148,149,150, 4,152,153,154,155, 20, 21,158, 26, /* 64 */ - 32, -1, -1, -1, -1, -1, -1, -1, -1, -1,162, 46, 60, 40, 43,124, /* 80 */ - 38, -1, -1, -1, -1, -1, -1, -1, -1, -1, 33, 36, 42, 41, 59,172, /* 96 */ - 45, 47, -1, -1, -1, -1, -1, -1, -1, -1,166, 44, 37, 95, 62, 63, /* 112 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, 96, 58, 35, 64, 39, 61, 34, /* 128 */ - -1, 97, 98, 99,100,101,102,103,104,105, -1, -1, -1, -1, -1, -1, /* 144 */ - -1,106,107,108,109,110,111,112,113,114, -1, -1, -1, -1, -1, -1, /* 160 */ - -1,126,115,116,117,118,119,120,121,122, -1, -1, -1, -1, -1, -1, /* 176 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */ - 123, 65, 66, 67, 68, 69, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, /* 208 */ - 125, 74, 75, 76, 77, 78, 79, 80, 81, 82, -1, -1, -1, -1, -1, -1, /* 224 */ - 92, -1, 83, 84, 85, 86, 87, 88, 89, 90, -1, -1, -1, -1, -1, -1, /* 240 */ - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1,159 /* 256 */ -}; - - -static int ebcdic_us_to_ibm1047_table[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 16 */ - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 32 */ - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 48 */ - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 64 */ - 64, -1, -1, -1, -1, -1, -1, -1, -1, -1, 74, 75, 76, 77, 78, 79, /* 80 */ - 80, -1, -1, -1, -1, -1, -1, -1, -1, -1, 90, 91, 92, 93, 94,176, /* 96 */ - 96, 97, -1, -1, -1, -1, -1, -1, -1, -1,106,107,108,109,110,111, /* 112 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1,121,122,123,124,125,126,127, /* 128 */ - -1,129,130,131,132,133,134,135,136,137, -1, -1, -1, -1, -1, -1, /* 144 */ - -1,145,146,147,148,149,150,151,152,153, -1, -1, -1, -1, -1, -1, /* 160 */ - -1,161,162,163,164,165,166,167,168,169, -1, -1, -1, -1, -1, -1, /* 176 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */ - 192,193,194,195,196,197,198,199,200,201, -1, -1, -1, -1, -1, -1, /* 208 */ - 208,209,210,211,212,213,214,215,216,217, -1, -1, -1, -1, -1, -1, /* 224 */ - 224, -1,226,227,228,229,230,231,232,233, -1, -1, -1, -1, -1, -1, /* 240 */ - 240,241,242,243,244,245,246,247,248,249, -1, -1, -1, -1, -1,255 /* 256 */ -}; - - -static int ibm1047_to_ascii_table[] = { - 0, 1, 2, 3, -1, 9, -1,127, -1, -1, -1, 11, 12, 13, 14, 15, /* 16 */ - 16, 17, 18, 19, -1, -1, 8, -1, 24, 25, -1, -1, 28, 29, 30, 31, /* 32 */ - -1, -1, -1, -1, -1, 10, 23, 27, -1, -1, -1, -1, -1, 5, 6, 7, /* 48 */ - -1, -1, 22, -1, -1, -1, -1, 4, -1, -1, -1, -1, 20, 21, -1, 26, /* 64 */ - 32, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 46, 60, 40, 43,124, /* 80 */ - 38, -1, -1, -1, -1, -1, -1, -1, -1, -1, 33, 36, 42, 41, 59, 94, /* 96 */ - 45, 47, -1, -1, -1, -1, -1, -1, -1, -1, -1, 44, 37, 95, 62, 63, /* 112 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, 96, 58, 35, 64, 39, 61, 34, /* 128 */ - -1, 97, 98, 99,100,101,102,103,104,105, -1, -1, -1, -1, -1, -1, /* 144 */ - -1,106,107,108,109,110,111,112,113,114, -1, -1, -1, -1, -1, -1, /* 160 */ - -1,126,115,116,117,118,119,120,121,122, -1, -1, -1, 91, -1, -1, /* 176 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 93, -1, -1, /* 192 */ - 123, 65, 66, 67, 68, 69, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, /* 208 */ - 125, 74, 75, 76, 77, 78, 79, 80, 81, 82, -1, -1, -1, -1, -1, -1, /* 224 */ - 92, -1, 83, 84, 85, 86, 87, 88, 89, 90, -1, -1, -1, -1, -1, -1, /* 240 */ - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1, -1 /* 256 */ -}; - - -static int ibm1047_to_iso_8859_1_table[] = { - 0, 1, 2, 3,156, 9,134,127,151,141,142, 11, 12, 13, 14, 15, /* 16 */ - 16, 17, 18, 19,157,133, 8,135, 24, 25,146,143, 28, 29, 30, 31, /* 32 */ - 128,129,130,131,132, 10, 23, 27,136,137,138,139,140, 5, 6, 7, /* 48 */ - 144,145, 22,147,148,149,150, 4,152,153,154,155, 20, 21,158, 26, /* 64 */ - 32,160,226,228,224,225,227,229,231,241,162, 46, 60, 40, 43,124, /* 80 */ - 38,233,234,235,232,237,238,239,236,223, 33, 36, 42, 41, 59, 94, /* 96 */ - 45, 47,194,196,192,193,195,197,199,209,166, 44, 37, 95, 62, 63, /* 112 */ - 248,201,202,203,200,205,206,207,204, 96, 58, 35, 64, 39, 61, 34, /* 128 */ - 216, 97, 98, 99,100,101,102,103,104,105,171,187,240,253,254,177, /* 144 */ - 176,106,107,108,109,110,111,112,113,114,170,186,230,184,198,164, /* 160 */ - 181,126,115,116,117,118,119,120,121,122,161,191,208, 91,222,174, /* 176 */ - 172,163,165,183,169,167,182,188,189,190,221,168,175, 93,180,215, /* 192 */ - 123, 65, 66, 67, 68, 69, 70, 71, 72, 73,173,244,246,242,243,245, /* 208 */ - 125, 74, 75, 76, 77, 78, 79, 80, 81, 82,185,251,252,249,250,255, /* 224 */ - 92,247, 83, 84, 85, 86, 87, 88, 89, 90,178,212,214,210,211,213, /* 240 */ - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,179,219,220,217,218,159 /* 256 */ -}; - - -static int ibm1047_to_ebcdic_us_table[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 16 */ - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 32 */ - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 48 */ - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 64 */ - 64, -1, -1, -1, -1, -1, -1, -1, -1, -1, 74, 75, 76, 77, 78, 79, /* 80 */ - 80, -1, -1, -1, -1, -1, -1, -1, -1, -1, 90, 91, 92, 93, 94, -1, /* 96 */ - 96, 97, -1, -1, -1, -1, -1, -1, -1, -1,106,107,108,109,110,111, /* 112 */ - -1, -1, -1, -1, -1, -1, -1, -1, -1,121,122,123,124,125,126,127, /* 128 */ - -1,129,130,131,132,133,134,135,136,137, -1, -1, -1, -1, -1, -1, /* 144 */ - -1,145,146,147,148,149,150,151,152,153, -1, -1, -1, -1, -1, -1, /* 160 */ - -1,161,162,163,164,165,166,167,168,169, -1, -1, -1, -1, -1, -1, /* 176 */ - 95, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */ - 192,193,194,195,196,197,198,199,200,201, -1, -1, -1, -1, -1, -1, /* 208 */ - 208,209,210,211,212,213,214,215,216,217, -1, -1, -1, -1, -1, -1, /* 224 */ - 224, -1,226,227,228,229,230,231,232,233, -1, -1, -1, -1, -1, -1, /* 240 */ - 240,241,242,243,244,245,246,247,248,249, -1, -1, -1, -1, -1,255 /* 256 */ -}; + desc = iconv_open (to, from); + if (desc == (iconv_t) -1) + perror_with_name ("Converting character sets"); + cleanups = make_cleanup (cleanup_iconv, &desc); + inleft = num_bytes; + inp = (char *) bytes; -static int -table_convert_char (void *baton, int from, int *to) -{ - int *table = (int *) baton; + space_request = num_bytes; - if (0 <= from && from <= 255 - && table[from] != -1) + while (inleft > 0) { - *to = table[from]; - return 1; + char *outp; + size_t outleft, r; + int old_size; + + old_size = obstack_object_size (output); + obstack_blank (output, space_request); + + outp = obstack_base (output) + old_size; + outleft = space_request; + + r = iconv (desc, &inp, &inleft, &outp, &outleft); + + /* Now make sure that the object on the obstack only includes + bytes we have converted. */ + obstack_blank (output, - (int) outleft); + + if (r == (size_t) -1) + { + switch (errno) + { + case EILSEQ: + { + int i; + + /* Invalid input sequence. */ + if (translit == translit_none) + error (_("Could not convert character to `%s' character set"), + to); + + /* We emit escape sequence for the bytes, skip them, + and try again. */ + for (i = 0; i < width; ++i) + { + char octal[5]; + + sprintf (octal, "\\%.3o", *inp & 0xff); + obstack_grow_str (output, octal); + + ++inp; + --inleft; + } + } + break; + + case E2BIG: + /* We ran out of space in the output buffer. Make it + bigger next time around. */ + space_request *= 2; + break; + + case EINVAL: + /* Incomplete input sequence. FIXME: ought to report this + to the caller somehow. */ + inleft = 0; + break; + + default: + perror_with_name ("Internal error while converting character sets"); + } + } } - else - return 0; -} - -static struct translation * -table_translation (const char *from, const char *to, int *table, - const char *(*c_target_char_has_backslash_escape) - (void *baton, int target_char), - void *c_target_char_has_backslash_escape_baton, - int (*c_parse_backslash) (void *baton, - int host_char, - int *target_char), - void *c_parse_backslash_baton) -{ - struct translation *t = xmalloc (sizeof (*t)); - - memset (t, 0, sizeof (*t)); - t->from = from; - t->to = to; - t->c_target_char_has_backslash_escape = c_target_char_has_backslash_escape; - t->c_target_char_has_backslash_escape_baton - = c_target_char_has_backslash_escape_baton; - t->c_parse_backslash = c_parse_backslash; - t->c_parse_backslash_baton = c_parse_backslash_baton; - t->convert_char = table_convert_char; - t->convert_char_baton = (void *) table; - - return t; + do_cleanups (cleanups); } - -static struct translation * -simple_table_translation (const char *from, const char *to, int *table) -{ - return table_translation (from, to, table, 0, 0, 0, 0); -} - - - -/* Setting and retrieving the host and target charsets. */ - - -/* The current host and target character sets. */ -static struct charset *current_host_charset, *current_target_charset; - -/* The current functions and batons we should use for the functions in - charset.h. */ - -static const char *(*c_target_char_has_backslash_escape_func) - (void *baton, int target_char); -static void *c_target_char_has_backslash_escape_baton; - -static int (*c_parse_backslash_func) (void *baton, - int host_char, - int *target_char); -static void *c_parse_backslash_baton; - -static int (*host_char_to_target_func) (void *baton, - int host_char, - int *target_char); -static void *host_char_to_target_baton; - -static int (*target_char_to_host_func) (void *baton, - int target_char, - int *host_char); -static void *target_char_to_host_baton; - - -/* Cached iconv conversions, that might be useful to fallback - routines. */ -static struct cached_iconv cached_iconv_host_to_target; -static struct cached_iconv cached_iconv_target_to_host; - -/* Charset structures manipulation functions. */ -static struct charset * -lookup_charset_or_error (const char *name) +/* An iterator that returns host wchar_t's from a target string. */ +struct wchar_iterator { - struct charset *cs = lookup_charset (name); + /* The underlying iconv descriptor. */ + iconv_t desc; - if (! cs) - error (_("GDB doesn't know of any character set named `%s'."), name); + /* The input string. This is updated as convert characters. */ + char *input; + /* The number of bytes remaining in the input. */ + size_t bytes; - return cs; -} + /* The width of an input character. */ + size_t width; -static void -check_valid_host_charset (struct charset *cs) -{ - if (! cs->valid_host_charset) - error (_("GDB can't use `%s' as its host character set."), cs->name); -} + /* The output buffer and its size. */ + gdb_wchar_t *out; + size_t out_size; +}; -/* Set the host and target character sets to HOST and TARGET. */ -static void -set_host_and_target_charsets (struct charset *host, struct charset *target) +/* Create a new iterator. */ +struct wchar_iterator * +make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset, + size_t width) { - struct translation *h2t, *t2h; - - /* If they're not both initialized yet, then just do nothing for - now. As soon as we're done running our initialize function, - everything will be initialized. */ - if (! host || ! target) - { - current_host_charset = host; - current_target_charset = target; - return; - } - - h2t = lookup_translation (host->name, target->name); - t2h = lookup_translation (target->name, host->name); - - /* If the translations don't provide conversion functions, make sure - iconv can back them up. Do this *before* modifying any state. */ - if (host != target) - { - if (! h2t || ! h2t->convert_char) - { - if (check_iconv_cache (&cached_iconv_host_to_target, host, target) - < 0) - error (_("GDB can't convert from the `%s' character set to `%s'."), - host->name, target->name); - } - if (! t2h || ! t2h->convert_char) - { - if (check_iconv_cache (&cached_iconv_target_to_host, target, host) - < 0) - error (_("GDB can't convert from the `%s' character set to `%s'."), - target->name, host->name); - } - } - - if (t2h && t2h->c_target_char_has_backslash_escape) - { - c_target_char_has_backslash_escape_func - = t2h->c_target_char_has_backslash_escape; - c_target_char_has_backslash_escape_baton - = t2h->c_target_char_has_backslash_escape_baton; - } - else - c_target_char_has_backslash_escape_func - = default_c_target_char_has_backslash_escape; - - if (h2t && h2t->c_parse_backslash) - { - c_parse_backslash_func = h2t->c_parse_backslash; - c_parse_backslash_baton = h2t->c_parse_backslash_baton; - } - else - c_parse_backslash_func = default_c_parse_backslash; - - if (h2t && h2t->convert_char) - { - host_char_to_target_func = h2t->convert_char; - host_char_to_target_baton = h2t->convert_char_baton; - } - else if (host == target) - host_char_to_target_func = identity_either_char_to_other; - else - { - host_char_to_target_func = iconv_convert; - host_char_to_target_baton = &cached_iconv_host_to_target; - } + struct wchar_iterator *result; + iconv_t desc; - if (t2h && t2h->convert_char) - { - target_char_to_host_func = t2h->convert_char; - target_char_to_host_baton = t2h->convert_char_baton; - } - else if (host == target) - target_char_to_host_func = identity_either_char_to_other; - else - { - target_char_to_host_func = iconv_convert; - target_char_to_host_baton = &cached_iconv_target_to_host; - } + desc = iconv_open ("wchar_t", charset); + if (desc == (iconv_t) -1) + perror_with_name ("Converting character sets"); - current_host_charset = host; - current_target_charset = target; -} + result = XNEW (struct wchar_iterator); + result->desc = desc; + result->input = (char *) input; + result->bytes = bytes; + result->width = width; -/* Do the real work of setting the host charset. */ -static void -set_host_charset (const char *charset) -{ - struct charset *cs = lookup_charset_or_error (charset); - check_valid_host_charset (cs); - set_host_and_target_charsets (cs, current_target_charset); -} + result->out = XNEW (gdb_wchar_t); + result->out_size = 1; -/* Do the real work of setting the target charset. */ -static void -set_target_charset (const char *charset) -{ - struct charset *cs = lookup_charset_or_error (charset); - - set_host_and_target_charsets (current_host_charset, cs); + return result; } - -/* 'Set charset', 'set host-charset', 'set target-charset', 'show - charset' sfunc's. */ - -/* This is the sfunc for the 'set charset' command. */ static void -set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c) +do_cleanup_iterator (void *p) { - struct charset *cs = lookup_charset_or_error (host_charset_name); - check_valid_host_charset (cs); - /* CAREFUL: set the target charset here as well. */ - target_charset_name = host_charset_name; - set_host_and_target_charsets (cs, cs); -} + struct wchar_iterator *iter = p; -/* 'set host-charset' command sfunc. We need a wrapper here because - the function needs to have a specific signature. */ -static void -set_host_charset_sfunc (char *charset, int from_tty, - struct cmd_list_element *c) -{ - set_host_charset (host_charset_name); + iconv_close (iter->desc); + xfree (iter->out); + xfree (iter); } -/* Wrapper for the 'set target-charset' command. */ -static void -set_target_charset_sfunc (char *charset, int from_tty, - struct cmd_list_element *c) +struct cleanup * +make_cleanup_wchar_iterator (struct wchar_iterator *iter) { - set_target_charset (target_charset_name); + return make_cleanup (do_cleanup_iterator, iter); } -/* sfunc for the 'show charset' command. */ -static void -show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c, - const char *name) -{ - if (current_host_charset == current_target_charset) - fprintf_filtered (file, - _("The current host and target character set is `%s'.\n"), - host_charset ()); - else +int +wchar_iterate (struct wchar_iterator *iter, + enum wchar_iterate_result *out_result, + gdb_wchar_t **out_chars, + const gdb_byte **ptr, + size_t *len) +{ + size_t out_request; + + /* Try to convert some characters. At first we try to convert just + a single character. The reason for this is that iconv does not + necessarily update its outgoing arguments when it encounters an + invalid input sequence -- but we want to reliably report this to + our caller so it can emit an escape sequence. */ + out_request = 1; + while (iter->bytes > 0) { - fprintf_filtered (file, _("The current host character set is `%s'.\n"), - host_charset ()); - fprintf_filtered (file, _("The current target character set is `%s'.\n"), - target_charset ()); + char *outptr = (char *) &iter->out[0]; + char *orig_inptr = iter->input; + size_t orig_in = iter->bytes; + size_t out_avail = out_request * sizeof (gdb_wchar_t); + size_t num; + gdb_wchar_t result; + + size_t r = iconv (iter->desc, (char **) &iter->input, &iter->bytes, + &outptr, &out_avail); + if (r == (size_t) -1) + { + switch (errno) + { + case EILSEQ: + /* Invalid input sequence. Skip it, and let the caller + know about it. */ + *out_result = wchar_iterate_invalid; + *ptr = iter->input; + *len = iter->width; + iter->input += iter->width; + iter->bytes -= iter->width; + return 0; + + case E2BIG: + /* We ran out of space. We still might have converted a + character; if so, return it. Otherwise, grow the + buffer and try again. */ + if (out_avail < out_request * sizeof (gdb_wchar_t)) + break; + + ++out_request; + if (out_request > iter->out_size) + { + iter->out_size = out_request; + iter->out = xrealloc (iter->out, + out_request * sizeof (gdb_wchar_t)); + } + continue; + + case EINVAL: + /* Incomplete input sequence. Let the caller know, and + arrange for future calls to see EOF. */ + *out_result = wchar_iterate_incomplete; + *ptr = iter->input; + *len = iter->bytes; + iter->bytes = 0; + return 0; + + default: + perror_with_name ("Internal error while converting character sets"); + } + } + + /* We converted something. */ + num = out_request - out_avail / sizeof (gdb_wchar_t); + *out_result = wchar_iterate_ok; + *out_chars = iter->out; + *ptr = orig_inptr; + *len = orig_in - iter->bytes; + return num; } + + /* Really done. */ + *out_result = wchar_iterate_eof; + return -1; } -/* Accessor functions. */ - -const char * -host_charset (void) -{ - return current_host_charset->name; -} +/* The charset.c module initialization function. */ -const char * -target_charset (void) -{ - return current_target_charset->name; -} +extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */ +typedef char *char_ptr; +DEF_VEC_P (char_ptr); - -/* Public character management functions. */ +static VEC (char_ptr) *charsets; +#ifdef PHONY_ICONV -const char * -c_target_char_has_backslash_escape (int target_char) +static void +find_charset_names (void) { - return ((*c_target_char_has_backslash_escape_func) - (c_target_char_has_backslash_escape_baton, target_char)); + VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET); + VEC_safe_push (char_ptr, charsets, NULL); } +#else /* PHONY_ICONV */ +#ifdef HAVE_ICONVLIST -int -c_parse_backslash (int host_char, int *target_char) +/* A helper function that adds some character sets to the vector of + all character sets. This is a callback function for iconvlist. */ + +static int +add_one (unsigned int count, const char *const *names, void *data) { - return (*c_parse_backslash_func) (c_parse_backslash_baton, - host_char, target_char); -} + unsigned int i; + for (i = 0; i < count; ++i) + VEC_safe_push (char_ptr, charsets, xstrdup (names[i])); -int -host_char_print_literally (int host_char) -{ - return ((*current_host_charset->host_char_print_literally) - (current_host_charset->host_char_print_literally_baton, - host_char)); + return 0; } - -int -target_char_to_control_char (int target_char, int *target_ctrl_char) +static void +find_charset_names (void) { - return ((*current_target_charset->target_char_to_control_char) - (current_target_charset->target_char_to_control_char_baton, - target_char, target_ctrl_char)); + iconvlist (add_one, NULL); + VEC_safe_push (char_ptr, charsets, NULL); } +#else -int -host_char_to_target (int host_char, int *target_char) +static void +find_charset_names (void) { - return ((*host_char_to_target_func) - (host_char_to_target_baton, host_char, target_char)); -} + FILE *in; + in = popen ("iconv -l", "r"); + /* It is ok to ignore errors; we'll fall back on a default. */ + if (!in) + return; -int -target_char_to_host (int target_char, int *host_char) -{ - return ((*target_char_to_host_func) - (target_char_to_host_baton, target_char, host_char)); -} + /* POSIX says that iconv -l uses an unspecified format. We parse + the glibc format; feel free to add others as needed. */ + while (!feof (in)) + { + /* The size of buf is chosen arbitrarily. A character set name + longer than this would not be very nice. */ + char buf[80]; + int len; + char *r = fgets (buf, sizeof (buf), in); + if (!r) + break; + len = strlen (r); + if (len <= 3) + continue; + if (buf[len - 2] == '/' && buf[len - 3] == '/') + buf[len - 3] = '\0'; + VEC_safe_push (char_ptr, charsets, xstrdup (buf)); + } + pclose (in); - -/* The charset.c module initialization function. */ + VEC_safe_push (char_ptr, charsets, NULL); +} -extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */ +#endif /* HAVE_ICONVLIST */ +#endif /* PHONY_ICONV */ void _initialize_charset (void) { struct cmd_list_element *new_cmd; - /* Register all the character set GDB knows about. - - You should use the same names that iconv does, where possible, to - take advantage of the iconv-based default behaviors. - - CAUTION: if you register a character set, you must also register - as many translations as are necessary to make that character set - interoperate correctly with all the other character sets. We do - provide default behaviors when no translation is available, or - when a translation's function pointer for a particular operation - is zero. Hopefully, these defaults will be correct often enough - that we won't need to provide too many translations. */ - register_charset (simple_charset ("ASCII", 1, - ascii_print_literally, 0, - ascii_to_control, 0)); - register_charset (iso_8859_family_charset ("ISO-8859-1")); - register_charset (ebcdic_family_charset ("EBCDIC-US")); - register_charset (ebcdic_family_charset ("IBM1047")); - register_iconv_charsets (); - - { - struct { char *from; char *to; int *table; } tlist[] = { - { "ASCII", "ISO-8859-1", ascii_to_iso_8859_1_table }, - { "ASCII", "EBCDIC-US", ascii_to_ebcdic_us_table }, - { "ASCII", "IBM1047", ascii_to_ibm1047_table }, - { "ISO-8859-1", "ASCII", iso_8859_1_to_ascii_table }, - { "ISO-8859-1", "EBCDIC-US", iso_8859_1_to_ebcdic_us_table }, - { "ISO-8859-1", "IBM1047", iso_8859_1_to_ibm1047_table }, - { "EBCDIC-US", "ASCII", ebcdic_us_to_ascii_table }, - { "EBCDIC-US", "ISO-8859-1", ebcdic_us_to_iso_8859_1_table }, - { "EBCDIC-US", "IBM1047", ebcdic_us_to_ibm1047_table }, - { "IBM1047", "ASCII", ibm1047_to_ascii_table }, - { "IBM1047", "ISO-8859-1", ibm1047_to_iso_8859_1_table }, - { "IBM1047", "EBCDIC-US", ibm1047_to_ebcdic_us_table } - }; - - int i; - - for (i = 0; i < (sizeof (tlist) / sizeof (tlist[0])); i++) - register_translation (simple_table_translation (tlist[i].from, - tlist[i].to, - tlist[i].table)); - } - - set_host_charset (host_charset_name); - set_target_charset (target_charset_name); + /* The first element is always "auto"; then we skip it for the + commands where it is not allowed. */ + VEC_safe_push (char_ptr, charsets, "auto"); + find_charset_names (); + + if (VEC_length (char_ptr, charsets) > 1) + charset_enum = (const char **) VEC_address (char_ptr, charsets); + else + charset_enum = default_charset_names; + +#ifndef PHONY_ICONV +#ifdef HAVE_LANGINFO_CODESET + auto_host_charset_name = nl_langinfo (CODESET); + target_charset_name = auto_host_charset_name; + + set_be_le_names (); +#endif +#endif add_setshow_enum_cmd ("charset", class_support, - host_charset_enum, &host_charset_name, _("\ + &charset_enum[1], &host_charset_name, _("\ Set the host and target character sets."), _("\ Show the host and target character sets."), _("\ The `host character set' is the one used by the system GDB is running on.\n\ @@ -1249,7 +763,7 @@ To see a list of the character sets GDB supports, type `set charset <TAB>'."), &setlist, &showlist); add_setshow_enum_cmd ("host-charset", class_support, - host_charset_enum, &host_charset_name, _("\ + charset_enum, &host_charset_name, _("\ Set the host character set."), _("\ Show the host character set."), _("\ The `host character set' is the one used by the system GDB is running on.\n\ @@ -1261,7 +775,7 @@ To see a list of the character sets GDB supports, type `set host-charset <TAB>'. &setlist, &showlist); add_setshow_enum_cmd ("target-charset", class_support, - target_charset_enum, &target_charset_name, _("\ + &charset_enum[1], &target_charset_name, _("\ Set the target character set."), _("\ Show the target character set."), _("\ The `target character set' is the one used by the program being debugged.\n\ @@ -1271,4 +785,19 @@ To see a list of the character sets GDB supports, type `set target-charset'<TAB> set_target_charset_sfunc, show_target_charset_name, &setlist, &showlist); + + add_setshow_enum_cmd ("target-wide-charset", class_support, + &charset_enum[1], &target_wide_charset_name, + _("\ +Set the target wide character set."), _("\ +Show the target wide character set."), _("\ +The `target wide character set' is the one used by the program being debugged.\n\ +In particular it is the encoding used by `wchar_t'.\n\ +GDB translates characters and strings between the host and target\n\ +character sets as needed.\n\ +To see a list of the character sets GDB supports, type\n\ +`set target-wide-charset'<TAB>"), + set_target_wide_charset_sfunc, + show_target_wide_charset_name, + &setlist, &showlist); } diff --git a/gdb/charset.h b/gdb/charset.h index 21780b6..60abb18 100644 --- a/gdb/charset.h +++ b/gdb/charset.h @@ -19,89 +19,129 @@ #ifndef CHARSET_H #define CHARSET_H - /* If the target program uses a different character set than the host, GDB has some support for translating between the two; GDB converts characters and strings to the host character set before displaying them, and converts characters and strings appearing in expressions entered by the user to the target character set. - At the moment, GDB only supports single-byte, stateless character - sets. This includes the ISO-8859 family (ASCII extended with - accented characters, and (I think) Cyrillic, for European - languages), and the EBCDIC family (used on IBM's mainframes). - Unfortunately, it excludes many Asian scripts, the fixed- and - variable-width Unicode encodings, and other desireable things. - Patches are welcome! (For example, it would be nice if the Java - string support could simply get absorbed into some more general - multi-byte encoding support.) - - Furthermore, GDB's code pretty much assumes that the host character - set is some superset of ASCII; there are plenty if ('0' + n) - expressions and the like. - - When the `iconv' library routine supports a character set meeting - the requirements above, it's easy to plug an entry into GDB's table - that uses iconv to handle the details. */ + GDB's code pretty much assumes that the host character set is some + superset of ASCII; there are plenty if ('0' + n) expressions and + the like. */ /* Return the name of the current host/target character set. The result is owned by the charset module; the caller should not free it. */ const char *host_charset (void); const char *target_charset (void); - -/* In general, the set of C backslash escapes (\n, \f) is specific to - the character set. Not all character sets will have form feed - characters, for example. - - The following functions allow GDB to parse and print control - characters in a character-set-independent way. They are both - language-specific (to C and C++) and character-set-specific. - Putting them here is a compromise. */ - - -/* If the target character TARGET_CHAR have a backslash escape in the - C language (i.e., a character like 'n' or 't'), return the host - character string that should follow the backslash. Otherwise, - return zero. - - When this function returns non-zero, the string it returns is - statically allocated; the caller is not responsible for freeing it. */ -const char *c_target_char_has_backslash_escape (int target_char); - - -/* If the host character HOST_CHAR is a valid backslash escape in the - C language for the target character set, return non-zero, and set - *TARGET_CHAR to the target character the backslash escape represents. - Otherwise, return zero. */ -int c_parse_backslash (int host_char, int *target_char); - - -/* Return non-zero if the host character HOST_CHAR can be printed - literally --- that is, if it can be readably printed as itself in a - character or string constant. Return zero if it should be printed - using some kind of numeric escape, like '\031' in C, '^(25)' in - Chill, or #25 in Pascal. */ -int host_char_print_literally (int host_char); - - -/* If the host character HOST_CHAR has an equivalent in the target - character set, set *TARGET_CHAR to that equivalent, and return - non-zero. Otherwise, return zero. */ -int host_char_to_target (int host_char, int *target_char); - - -/* If the target character TARGET_CHAR has an equivalent in the host - character set, set *HOST_CHAR to that equivalent, and return - non-zero. Otherwise, return zero. */ -int target_char_to_host (int target_char, int *host_char); - - -/* If the target character TARGET_CHAR has a corresponding control - character (also in the target character set), set *TARGET_CTRL_CHAR - to the control character, and return non-zero. Otherwise, return - zero. */ -int target_char_to_control_char (int target_char, int *target_ctrl_char); - +const char *target_wide_charset (void); + +/* These values are used to specify the type of transliteration done + by convert_between_encodings. */ +enum transliterations + { + /* Error on failure to convert. */ + translit_none, + /* Transliterate to host char. */ + translit_char + }; + +/* Convert between two encodings. + + FROM is the name of the source encoding. + TO is the name of the target encoding. + BYTES holds the bytes to convert; this is assumed to be characters + in the target encoding. + NUM_BYTES is the number of bytes. + WIDTH is the width of a character from the FROM charset, in bytes. + For a variable width encoding, WIDTH should be the size of a "base + character". + OUTPUT is an obstack where the converted data is written. The + caller is responsible for initializing the obstack, and for + destroying the obstack should an error occur. + TRANSLIT specifies how invalid conversions should be handled. */ +void convert_between_encodings (const char *from, const char *to, + const gdb_byte *bytes, unsigned int num_bytes, + int width, struct obstack *output, + enum transliterations translit); + + +/* These values are used by wchar_iterate to report errors. */ +enum wchar_iterate_result + { + /* Ordinary return. */ + wchar_iterate_ok, + /* Invalid input sequence. */ + wchar_iterate_invalid, + /* Incomplete input sequence at the end of the input. */ + wchar_iterate_incomplete, + /* EOF. */ + wchar_iterate_eof + }; + +/* Declaration of the opaque wchar iterator type. */ +struct wchar_iterator; + +/* Create a new character iterator which returns wchar_t's. INPUT is + the input buffer. BYTES is the number of bytes in the input + buffer. CHARSET is the name of the character set in which INPUT is + encoded. WIDTH is the number of bytes in a base character of + CHARSET. + + This function either returns a new character set iterator, or calls + error. The result can be freed using a cleanup; see + make_cleanup_wchar_iterator. */ +struct wchar_iterator *make_wchar_iterator (const gdb_byte *input, size_t bytes, + const char *charset, + size_t width); + +/* Return a new cleanup suitable for destroying the wchar iterator + ITER. */ +struct cleanup *make_cleanup_wchar_iterator (struct wchar_iterator *iter); + +/* Perform a single iteration of a wchar_t iterator. + + Returns the number of characters converted. A negative result + means that EOF has been reached. A positive result indicates the + number of valid wchar_ts in the result; *OUT_CHARS is updated to + point to the first valid character. + + In all cases aside from EOF, *PTR is set to point to the first + converted target byte. *LEN is set to the number of bytes + converted. + + A zero result means one of several unusual results. *OUT_RESULT is + set to indicate the type of un-ordinary return. + + wchar_iterate_invalid means that an invalid input character was + seen. The iterator is advanced by WIDTH (the argument to + make_wchar_iterator) bytes. + + wchar_iterate_incomplete means that an incomplete character was + seen at the end of the input sequence. + + wchar_iterate_eof means that all bytes were successfully + converted. The other output arguments are not set. */ +int wchar_iterate (struct wchar_iterator *iter, + enum wchar_iterate_result *out_result, + gdb_wchar_t **out_chars, + const gdb_byte **ptr, size_t *len); + + + +/* GDB needs to know a few details of its execution character set. + This knowledge is isolated here and in charset.c. */ + +/* The escape character. */ +#define HOST_ESCAPE_CHAR 27 + +/* Convert a letter, like 'c', to its corresponding control + character. */ +char host_letter_to_control_character (char c); + +/* Convert a hex digit character to its numeric value. E.g., 'f' is + converted to 15. This function assumes that C is a valid hex + digit. Both upper- and lower-case letters are recognized. */ +int host_hex_value (char c); #endif /* CHARSET_H */ diff --git a/gdb/config.in b/gdb/config.in index 6aaf77a..247e206 100644 --- a/gdb/config.in +++ b/gdb/config.in @@ -169,12 +169,18 @@ /* Define if you have the iconv() function. */ #undef HAVE_ICONV +/* Define to 1 if you have the `iconvlist' function. */ +#undef HAVE_ICONVLIST + /* Define if your compiler supports the #include_next directive. */ #undef HAVE_INCLUDE_NEXT /* Define to 1 if you have the <inttypes.h> header file. */ #undef HAVE_INTTYPES_H +/* Define if you have <langinfo.h> and nl_langinfo(CODESET). */ +#undef HAVE_LANGINFO_CODESET + /* Define if your <locale.h> file defines LC_MESSAGES. */ #undef HAVE_LC_MESSAGES diff --git a/gdb/configure b/gdb/configure index 7579c84..87eef71 100755 --- a/gdb/configure +++ b/gdb/configure @@ -314,7 +314,7 @@ ac_subdirs_all="$ac_subdirs_all doc testsuite" ac_subdirs_all="$ac_subdirs_all gdbtk" ac_subdirs_all="$ac_subdirs_all multi-ice" ac_subdirs_all="$ac_subdirs_all gdbserver" -ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS MAINTAINER_MODE_TRUE MAINTAINER_MODE_FALSE MAINT CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP EGREP RANLIB ac_ct_RANLIB build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os am__leading_dot DEPDIR CCDEPMODE MAKE GMAKE_TRUE GMAKE_FALSE SET_MAKE USE_NLS LIBINTL LIBINTL_DEP INCINTL XGETTEXT GMSGFMT POSUB CATALOGS DATADIRNAME INSTOBJEXT GENCAT CATOBJEXT localedir GL_COND_LIBTOOL_TRUE GL_COND_LIBTOOL_FALSE GNULIB_MEMMEM GNULIB_MEMPCPY GNULIB_MEMRCHR GNULIB_STPCPY GNULIB_STPNCPY GNULIB_STRCHRNUL GNULIB_STRDUP GNULIB_STRNDUP GNULIB_STRNLEN GNULIB_STRPBRK GNULIB_STRSEP GNULIB_STRSTR GNULIB_STRCASESTR GNULIB_STRTOK_R GNULIB_MBSLEN GNULIB_MBSNLEN GNULIB_MBSCHR GNULIB_MBSRCHR GNULIB_MBSSTR GNULIB_MBSCASECMP GNULIB_MBSNCASECMP GNULIB_MBSPCASECMP GNULIB_MBSCASESTR GNULIB_MBSCSPN GNULIB_MBSPBRK GNULIB_MBSSPN GNULIB_MBSSEP GNULIB_MBSTOK_R GNULIB_STRERROR GNULIB_STRSIGNAL HAVE_DECL_MEMMEM HAVE_MEMPCPY HAVE_DECL_MEMRCHR HAVE_STPCPY HAVE_STPNCPY HAVE_STRCHRNUL HAVE_DECL_STRDUP HAVE_STRNDUP HAVE_DECL_STRNDUP HAVE_DECL_STRNLEN HAVE_STRPBRK HAVE_STRSEP HAVE_STRCASESTR HAVE_DECL_STRTOK_R HAVE_DECL_STRERROR HAVE_DECL_STRSIGNAL REPLACE_STRERROR REPLACE_STRSIGNAL REPLACE_MEMMEM REPLACE_STRCASESTR REPLACE_STRSTR HAVE_LONG_LONG_INT HAVE_UNSIGNED_LONG_LONG_INT HAVE_INTTYPES_H HAVE_SYS_TYPES_H INCLUDE_NEXT NEXT_STDINT_H HAVE_STDINT_H HAVE_SYS_INTTYPES_H HAVE_SYS_BITYPES_H BITSIZEOF_PTRDIFF_T BITSIZEOF_SIG_ATOMIC_T BITSIZEOF_SIZE_T BITSIZEOF_WCHAR_T BITSIZEOF_WINT_T HAVE_SIGNED_SIG_ATOMIC_T HAVE_SIGNED_WCHAR_T HAVE_SIGNED_WINT_T PTRDIFF_T_SUFFIX SIG_ATOMIC_T_SUFFIX SIZE_T_SUFFIX WCHAR_T_SUFFIX WINT_T_SUFFIX STDINT_H NEXT_STRING_H GNULIB_WCWIDTH HAVE_DECL_WCWIDTH REPLACE_WCWIDTH WCHAR_H HAVE_WCHAR_H NEXT_WCHAR_H LIBGNU_LIBDEPS LIBGNU_LTLIBDEPS GNULIB_STDINT_H PACKAGE INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK AMTAR am__tar am__untar am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH am__fastdepCC_TRUE am__fastdepCC_FALSE subdirs TARGET_OBS PKGVERSION REPORT_BUGS_TO REPORT_BUGS_TEXI LN_S YACC AR ac_ct_AR DLLTOOL ac_ct_DLLTOOL WINDRES ac_ct_WINDRES MIG ac_ct_MIG READLINE READLINE_DEPS READLINE_CFLAGS HAVE_LIBEXPAT LIBEXPAT LTLIBEXPAT PYTHON_CFLAGS ALLOCA CONFIG_LDFLAGS TARGET_SYSTEM_ROOT TARGET_SYSTEM_ROOT_DEFINE WARN_CFLAGS WERROR_CFLAGS SER_HARDWIRE WIN32LIBS LIBGUI GUI_CFLAGS_X WIN32LDAPP TCL_VERSION TCL_PATCH_LEVEL TCL_BIN_DIR TCL_SRC_DIR TCL_LIB_FILE TCL_LIB_FLAG TCL_LIB_SPEC TCL_STUB_LIB_FILE TCL_STUB_LIB_FLAG TCL_STUB_LIB_SPEC TCL_INCLUDE TCL_LIBRARY TCL_DEPS TK_VERSION TK_BIN_DIR TK_SRC_DIR TK_LIB_FILE TK_LIB_FLAG TK_LIB_SPEC TK_STUB_LIB_FILE TK_STUB_LIB_FLAG TK_STUB_LIB_SPEC TK_INCLUDE TK_LIBRARY TK_DEPS TK_XINCLUDES X_CFLAGS X_LDFLAGS X_LIBS GDBTKLIBS GDBTK_CFLAGS GDBTK_SRC_DIR SIM SIM_OBS ENABLE_CFLAGS PROFILE_CFLAGS CONFIG_OBS CONFIG_DEPS CONFIG_SRCS CONFIG_ALL CONFIG_CLEAN CONFIG_INSTALL CONFIG_UNINSTALL target_subdir frags nm_h LIBICONV LIBOBJS LTLIBOBJS gl_LIBOBJS gl_LTLIBOBJS gltests_LIBOBJS gltests_LTLIBOBJS' +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS MAINTAINER_MODE_TRUE MAINTAINER_MODE_FALSE MAINT CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP EGREP RANLIB ac_ct_RANLIB build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os am__leading_dot DEPDIR CCDEPMODE MAKE GMAKE_TRUE GMAKE_FALSE SET_MAKE USE_NLS LIBINTL LIBINTL_DEP INCINTL XGETTEXT GMSGFMT POSUB CATALOGS DATADIRNAME INSTOBJEXT GENCAT CATOBJEXT localedir GL_COND_LIBTOOL_TRUE GL_COND_LIBTOOL_FALSE GNULIB_MEMMEM GNULIB_MEMPCPY GNULIB_MEMRCHR GNULIB_STPCPY GNULIB_STPNCPY GNULIB_STRCHRNUL GNULIB_STRDUP GNULIB_STRNDUP GNULIB_STRNLEN GNULIB_STRPBRK GNULIB_STRSEP GNULIB_STRSTR GNULIB_STRCASESTR GNULIB_STRTOK_R GNULIB_MBSLEN GNULIB_MBSNLEN GNULIB_MBSCHR GNULIB_MBSRCHR GNULIB_MBSSTR GNULIB_MBSCASECMP GNULIB_MBSNCASECMP GNULIB_MBSPCASECMP GNULIB_MBSCASESTR GNULIB_MBSCSPN GNULIB_MBSPBRK GNULIB_MBSSPN GNULIB_MBSSEP GNULIB_MBSTOK_R GNULIB_STRERROR GNULIB_STRSIGNAL HAVE_DECL_MEMMEM HAVE_MEMPCPY HAVE_DECL_MEMRCHR HAVE_STPCPY HAVE_STPNCPY HAVE_STRCHRNUL HAVE_DECL_STRDUP HAVE_STRNDUP HAVE_DECL_STRNDUP HAVE_DECL_STRNLEN HAVE_STRPBRK HAVE_STRSEP HAVE_STRCASESTR HAVE_DECL_STRTOK_R HAVE_DECL_STRERROR HAVE_DECL_STRSIGNAL REPLACE_STRERROR REPLACE_STRSIGNAL REPLACE_MEMMEM REPLACE_STRCASESTR REPLACE_STRSTR HAVE_LONG_LONG_INT HAVE_UNSIGNED_LONG_LONG_INT HAVE_INTTYPES_H HAVE_SYS_TYPES_H INCLUDE_NEXT NEXT_STDINT_H HAVE_STDINT_H HAVE_SYS_INTTYPES_H HAVE_SYS_BITYPES_H BITSIZEOF_PTRDIFF_T BITSIZEOF_SIG_ATOMIC_T BITSIZEOF_SIZE_T BITSIZEOF_WCHAR_T BITSIZEOF_WINT_T HAVE_SIGNED_SIG_ATOMIC_T HAVE_SIGNED_WCHAR_T HAVE_SIGNED_WINT_T PTRDIFF_T_SUFFIX SIG_ATOMIC_T_SUFFIX SIZE_T_SUFFIX WCHAR_T_SUFFIX WINT_T_SUFFIX STDINT_H NEXT_STRING_H GNULIB_WCWIDTH HAVE_DECL_WCWIDTH REPLACE_WCWIDTH WCHAR_H HAVE_WCHAR_H NEXT_WCHAR_H LIBGNU_LIBDEPS LIBGNU_LTLIBDEPS GNULIB_STDINT_H PACKAGE INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK AMTAR am__tar am__untar am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH am__fastdepCC_TRUE am__fastdepCC_FALSE subdirs TARGET_OBS PKGVERSION REPORT_BUGS_TO REPORT_BUGS_TEXI LN_S YACC AR ac_ct_AR DLLTOOL ac_ct_DLLTOOL WINDRES ac_ct_WINDRES MIG ac_ct_MIG LIBICONV LIBICONV_INCLUDE LIBICONV_LIBDIR READLINE READLINE_DEPS READLINE_CFLAGS HAVE_LIBEXPAT LIBEXPAT LTLIBEXPAT PYTHON_CFLAGS ALLOCA CONFIG_LDFLAGS TARGET_SYSTEM_ROOT TARGET_SYSTEM_ROOT_DEFINE WARN_CFLAGS WERROR_CFLAGS SER_HARDWIRE WIN32LIBS LIBGUI GUI_CFLAGS_X WIN32LDAPP TCL_VERSION TCL_PATCH_LEVEL TCL_BIN_DIR TCL_SRC_DIR TCL_LIB_FILE TCL_LIB_FLAG TCL_LIB_SPEC TCL_STUB_LIB_FILE TCL_STUB_LIB_FLAG TCL_STUB_LIB_SPEC TCL_INCLUDE TCL_LIBRARY TCL_DEPS TK_VERSION TK_BIN_DIR TK_SRC_DIR TK_LIB_FILE TK_LIB_FLAG TK_LIB_SPEC TK_STUB_LIB_FILE TK_STUB_LIB_FLAG TK_STUB_LIB_SPEC TK_INCLUDE TK_LIBRARY TK_DEPS TK_XINCLUDES X_CFLAGS X_LDFLAGS X_LIBS GDBTKLIBS GDBTK_CFLAGS GDBTK_SRC_DIR SIM SIM_OBS ENABLE_CFLAGS PROFILE_CFLAGS CONFIG_OBS CONFIG_DEPS CONFIG_SRCS CONFIG_ALL CONFIG_CLEAN CONFIG_INSTALL CONFIG_UNINSTALL target_subdir frags nm_h LIBOBJS LTLIBOBJS gl_LIBOBJS gl_LTLIBOBJS gltests_LIBOBJS gltests_LTLIBOBJS' ac_subst_files='host_makefile_frag' ac_pwd=`pwd` @@ -885,6 +885,7 @@ Optional Packages: --with-libunwind Use libunwind frame unwinding support --with-pkgversion=PKG Use PKG in the version string in place of "GDB" --with-bugurl=URL Direct users to URL to report a bug + --with-libiconv-prefix=DIR search for libiconv in DIR/include and DIR/lib --with-system-readline use installed readline library --with-expat include expat support (auto/yes/no) --with-gnu-ld assume the C compiler uses GNU ld default=no @@ -899,7 +900,6 @@ Optional Packages: --with-tcl directory containing tcl configuration (tclConfig.sh) --with-tk directory containing tk configuration (tkConfig.sh) --with-x use the X Window System - --with-libiconv-prefix=DIR search for libiconv in DIR/include and DIR/lib Some influential environment variables: CC C compiler command @@ -9989,6 +9989,296 @@ if test "$ac_cv_search_dlgetmodinfo" != no; then fi + + + +# Check whether --with-libiconv-prefix or --without-libiconv-prefix was given. +if test "${with_libiconv_prefix+set}" = set; then + withval="$with_libiconv_prefix" + + for dir in `echo "$withval" | tr : ' '`; do + if test -d $dir/include; then LIBICONV_INCLUDE="-I$dir/include"; CPPFLAGS="$CPPFLAGS -I$dir/include"; fi + if test -d $dir/lib; then LIBICONV_LIBDIR="-L$dir/lib"; LDFLAGS="$LDFLAGS -L$dir/lib"; fi + done + +fi; + + BUILD_LIBICONV_LIBDIR="-L../libiconv/lib/.libs -L../libiconv/lib/_libs" + BUILD_LIBICONV_INCLUDE="-I../libiconv/include" + + echo "$as_me:$LINENO: checking for iconv" >&5 +echo $ECHO_N "checking for iconv... $ECHO_C" >&6 +if test "${am_cv_func_iconv+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + + am_cv_func_iconv="no, consider installing GNU libiconv" + am_cv_lib_iconv=no + am_cv_use_build_libiconv=no + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <stdlib.h> +#include <iconv.h> +int +main () +{ +iconv_t cd = iconv_open("",""); + iconv(cd,NULL,NULL,NULL,NULL); + iconv_close(cd); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + am_cv_func_iconv=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test "$am_cv_func_iconv" != yes; then + am_save_LIBS="$LIBS" + LIBS="$LIBS -liconv" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <stdlib.h> +#include <iconv.h> +int +main () +{ +iconv_t cd = iconv_open("",""); + iconv(cd,NULL,NULL,NULL,NULL); + iconv_close(cd); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + am_cv_lib_iconv=yes + am_cv_func_iconv=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LIBS="$am_save_LIBS" + fi + # Look for libiconv in the build tree. + if test "$am_cv_func_iconv" != yes && test -d ../libiconv; then + am_save_LIBS="$LIBS" + am_save_CPPFLAGS="$CPPFLAGS" + LIBS="$LIBS $BUILD_LIBICONV_LIBDIR -liconv" + CPPFLAGS="$CPPFLAGS $BUILD_LIBICONV_INCLUDE" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <stdlib.h> +#include <iconv.h> +int +main () +{ +iconv_t cd = iconv_open("",""); + iconv(cd,NULL,NULL,NULL,NULL); + iconv_close(cd); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + am_cv_lib_iconv=yes + am_cv_func_iconv=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LIBS="$am_save_LIBS" + if test "$am_cv_func_iconv" = "yes"; then + am_cv_use_build_libiconv=yes + else + CPPFLAGS="$am_save_CPPFLAGS" + fi + fi + +fi +echo "$as_me:$LINENO: result: $am_cv_func_iconv" >&5 +echo "${ECHO_T}$am_cv_func_iconv" >&6 + if test "$am_cv_func_iconv" = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_ICONV 1 +_ACEOF + + echo "$as_me:$LINENO: checking for iconv declaration" >&5 +echo $ECHO_N "checking for iconv declaration... $ECHO_C" >&6 + if test "${am_cv_proto_iconv+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +#include <stdlib.h> +#include <iconv.h> +extern +#ifdef __cplusplus +"C" +#endif +#if defined(__STDC__) || defined(__cplusplus) +size_t iconv (iconv_t cd, char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft); +#else +size_t iconv(); +#endif + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + am_cv_proto_iconv_arg1="" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +am_cv_proto_iconv_arg1="const" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + am_cv_proto_iconv="extern size_t iconv (iconv_t cd, $am_cv_proto_iconv_arg1 char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);" +fi + + am_cv_proto_iconv=`echo "$am_cv_proto_iconv" | tr -s ' ' | sed -e 's/( /(/'` + echo "$as_me:$LINENO: result: ${ac_t:- + }$am_cv_proto_iconv" >&5 +echo "${ECHO_T}${ac_t:- + }$am_cv_proto_iconv" >&6 + +cat >>confdefs.h <<_ACEOF +#define ICONV_CONST $am_cv_proto_iconv_arg1 +_ACEOF + + fi + LIBICONV= + if test "$am_cv_lib_iconv" = yes; then + LIBICONV="-liconv" + fi + if test "$am_cv_use_build_libiconv" = yes; then + LIBICONV_LIBDIR="$BUILD_LIBICONV_LIBDIR" + LIBICONV_INCLUDE="$BUILD_LIBICONV_INCLUDE" + fi + + + + + # On alpha-osf, it appears that libtermcap and libcurses are not compatible. # There is a very specific comment in /usr/include/curses.h explaining that # termcap routines built into libcurses must not be used. @@ -15445,10 +15735,11 @@ fi + for ac_func in canonicalize_file_name realpath getrusage getuid \ getgid poll pread64 sbrk setpgid setpgrp setsid \ sigaction sigprocmask sigsetmask socketpair syscall \ - ttrace wborder setlocale + ttrace wborder setlocale iconvlist do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` echo "$as_me:$LINENO: checking for $ac_func" >&5 @@ -15550,6 +15841,70 @@ fi done + echo "$as_me:$LINENO: checking for nl_langinfo and CODESET" >&5 +echo $ECHO_N "checking for nl_langinfo and CODESET... $ECHO_C" >&6 +if test "${am_cv_langinfo_codeset+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <langinfo.h> +int +main () +{ +char* cs = nl_langinfo(CODESET); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + am_cv_langinfo_codeset=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +am_cv_langinfo_codeset=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + +fi +echo "$as_me:$LINENO: result: $am_cv_langinfo_codeset" >&5 +echo "${ECHO_T}$am_cv_langinfo_codeset" >&6 + if test $am_cv_langinfo_codeset = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_LANGINFO_CODESET 1 +_ACEOF + + fi + + # Check the return and argument types of ptrace. No canned test for # this, so roll our own. gdb_ptrace_headers=' @@ -20776,229 +21131,10 @@ done - -cat >>confdefs.h <<\_ACEOF -#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" -_ACEOF - - - - - -# Check whether --with-libiconv-prefix or --without-libiconv-prefix was given. -if test "${with_libiconv_prefix+set}" = set; then - withval="$with_libiconv_prefix" - - for dir in `echo "$withval" | tr : ' '`; do - if test -d $dir/include; then CPPFLAGS="$CPPFLAGS -I$dir/include"; fi - if test -d $dir/lib; then LDFLAGS="$LDFLAGS -L$dir/lib"; fi - done - -fi; - - echo "$as_me:$LINENO: checking for iconv" >&5 -echo $ECHO_N "checking for iconv... $ECHO_C" >&6 -if test "${am_cv_func_iconv+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - - am_cv_func_iconv="no, consider installing GNU libiconv" - am_cv_lib_iconv=no - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include <stdlib.h> -#include <iconv.h> -int -main () -{ -iconv_t cd = iconv_open("",""); - iconv(cd,NULL,NULL,NULL,NULL); - iconv_close(cd); - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 - (eval $ac_link) 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" - || test ! -s conftest.err' - { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 - (eval $ac_try) 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest$ac_exeext' - { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 - (eval $ac_try) 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - am_cv_func_iconv=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -fi -rm -f conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext - if test "$am_cv_func_iconv" != yes; then - am_save_LIBS="$LIBS" - LIBS="$LIBS -liconv" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include <stdlib.h> -#include <iconv.h> -int -main () -{ -iconv_t cd = iconv_open("",""); - iconv(cd,NULL,NULL,NULL,NULL); - iconv_close(cd); - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 - (eval $ac_link) 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" - || test ! -s conftest.err' - { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 - (eval $ac_try) 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest$ac_exeext' - { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 - (eval $ac_try) 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - am_cv_lib_iconv=yes - am_cv_func_iconv=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -fi -rm -f conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext - LIBS="$am_save_LIBS" - fi - -fi -echo "$as_me:$LINENO: result: $am_cv_func_iconv" >&5 -echo "${ECHO_T}$am_cv_func_iconv" >&6 - if test "$am_cv_func_iconv" = yes; then - cat >>confdefs.h <<\_ACEOF -#define HAVE_ICONV 1 +#define GDB_DEFAULT_HOST_CHARSET "UTF-8" _ACEOF - echo "$as_me:$LINENO: checking for iconv declaration" >&5 -echo $ECHO_N "checking for iconv declaration... $ECHO_C" >&6 - if test "${am_cv_proto_iconv+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -#include <stdlib.h> -#include <iconv.h> -extern -#ifdef __cplusplus -"C" -#endif -#if defined(__STDC__) || defined(__cplusplus) -size_t iconv (iconv_t cd, char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft); -#else -size_t iconv(); -#endif - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 - (eval $ac_compile) 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" - || test ! -s conftest.err' - { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 - (eval $ac_try) 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest.$ac_objext' - { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 - (eval $ac_try) 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - am_cv_proto_iconv_arg1="" -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -am_cv_proto_iconv_arg1="const" -fi -rm -f conftest.err conftest.$ac_objext conftest.$ac_ext - am_cv_proto_iconv="extern size_t iconv (iconv_t cd, $am_cv_proto_iconv_arg1 char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);" -fi - - am_cv_proto_iconv=`echo "$am_cv_proto_iconv" | tr -s ' ' | sed -e 's/( /(/'` - echo "$as_me:$LINENO: result: ${ac_t:- - }$am_cv_proto_iconv" >&5 -echo "${ECHO_T}${ac_t:- - }$am_cv_proto_iconv" >&6 - -cat >>confdefs.h <<_ACEOF -#define ICONV_CONST $am_cv_proto_iconv_arg1 -_ACEOF - - fi - LIBICONV= - if test "$am_cv_lib_iconv" = yes; then - LIBICONV="-liconv" - fi - - ac_config_files="$ac_config_files Makefile .gdbinit:gdbinit.in gnulib/Makefile" ac_config_commands="$ac_config_commands default" @@ -21880,6 +22016,9 @@ s,@WINDRES@,$WINDRES,;t t s,@ac_ct_WINDRES@,$ac_ct_WINDRES,;t t s,@MIG@,$MIG,;t t s,@ac_ct_MIG@,$ac_ct_MIG,;t t +s,@LIBICONV@,$LIBICONV,;t t +s,@LIBICONV_INCLUDE@,$LIBICONV_INCLUDE,;t t +s,@LIBICONV_LIBDIR@,$LIBICONV_LIBDIR,;t t s,@READLINE@,$READLINE,;t t s,@READLINE_DEPS@,$READLINE_DEPS,;t t s,@READLINE_CFLAGS@,$READLINE_CFLAGS,;t t @@ -21944,7 +22083,6 @@ s,@CONFIG_UNINSTALL@,$CONFIG_UNINSTALL,;t t s,@target_subdir@,$target_subdir,;t t s,@frags@,$frags,;t t s,@nm_h@,$nm_h,;t t -s,@LIBICONV@,$LIBICONV,;t t s,@LIBOBJS@,$LIBOBJS,;t t s,@LTLIBOBJS@,$LTLIBOBJS,;t t s,@gl_LIBOBJS@,$gl_LIBOBJS,;t t diff --git a/gdb/configure.ac b/gdb/configure.ac index 3f81ff2..afc006d 100644 --- a/gdb/configure.ac +++ b/gdb/configure.ac @@ -1,6 +1,6 @@ dnl Autoconf configure script for GDB, the GNU debugger. dnl Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, -dnl 2005, 2006, 2007, 2008 +dnl 2005, 2006, 2007, 2008, 2009 dnl Free Software Foundation, Inc. dnl dnl This file is part of GDB. @@ -430,6 +430,8 @@ AC_SEARCH_LIBS(zlibVersion, z, [AC_CHECK_HEADERS(zlib.h)]) # On HP/UX we may need libxpdl for dlgetmodinfo (used by solib-pa64.c). AC_SEARCH_LIBS(dlgetmodinfo, [dl xpdl]) +AM_ICONV + # On alpha-osf, it appears that libtermcap and libcurses are not compatible. # There is a very specific comment in /usr/include/curses.h explaining that # termcap routines built into libcurses must not be used. @@ -776,7 +778,8 @@ AC_FUNC_VFORK AC_CHECK_FUNCS([canonicalize_file_name realpath getrusage getuid \ getgid poll pread64 sbrk setpgid setpgrp setsid \ sigaction sigprocmask sigsetmask socketpair syscall \ - ttrace wborder setlocale]) + ttrace wborder setlocale iconvlist]) +AM_LANGINFO_CODESET # Check the return and argument types of ptrace. No canned test for # this, so roll our own. @@ -1930,17 +1933,10 @@ dnl Check for exe extension set on certain hosts (e.g. Win32) AC_EXEEXT dnl Detect the character set used by this host. - -dnl At the moment, we just assume it's ISO-8859-1 (which is a -dnl superset of ASCII containing the characters needed for French, -dnl German, Spanish, Italian, and possibly others), but if were -dnl *were* to support any host character sets other than ISO-8859-1, -dnl here's where we'd detect it. -AC_DEFINE(GDB_DEFAULT_HOST_CHARSET, "ISO-8859-1", +dnl At the moment, we just assume it's UTF-8. +AC_DEFINE(GDB_DEFAULT_HOST_CHARSET, "UTF-8", [Define to be a string naming the default host character set.]) -AM_ICONV - AC_OUTPUT(Makefile .gdbinit:gdbinit.in gnulib/Makefile, [ dnl Autoconf doesn't provide a mechanism for modifying definitions @@ -61,6 +61,8 @@ #include "gdb_locale.h" +#include "gdb_wchar.h" + /* For ``enum target_signal''. */ #include "gdb/signals.h" diff --git a/gdb/doc/ChangeLog b/gdb/doc/ChangeLog index ff25386..f6e1f57 100644 --- a/gdb/doc/ChangeLog +++ b/gdb/doc/ChangeLog @@ -1,3 +1,9 @@ +2009-03-20 Tom Tromey <tromey@redhat.com> + + * gdb.texinfo (Character Sets): Remove obsolete text. Document + set target-wide-charset. + (Requirements): Mention iconv. + 2009-03-17 Hui Zhu <teawater@gmail.com> * gdb.texinfo: Change the introduce of "disassemble-next-line". diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo index 044e1ad..af577e7 100644 --- a/gdb/doc/gdb.texinfo +++ b/gdb/doc/gdb.texinfo @@ -7959,13 +7959,17 @@ support: @table @code @item set target-charset @var{charset} @kindex set target-charset -Set the current target character set to @var{charset}. We list the -character set names @value{GDBN} recognizes below, but if you type -@code{set target-charset} followed by @key{TAB}@key{TAB}, @value{GDBN} will -list the target character sets it supports. -@end table +Set the current target character set to @var{charset}. If you type +@code{set target-charset} followed by @key{TAB}@key{TAB}, @value{GDBN} +will list the target character sets it supports. + +@item set target-wide-charset @var{charset} +@kindex set target-wide-charset +Set the current target wide character set to @var{charset}. The +target wide character set is the character set used by @code{wchar_t}. +If you type @code{set target-charset} followed by @key{TAB}@key{TAB}, +@value{GDBN} will list the target character sets it supports. -@table @code @item set host-charset @var{charset} @kindex set host-charset Set the current host character set to @var{charset}. @@ -7975,10 +7979,9 @@ system it is running on; you can override that default using the @code{set host-charset} command. @value{GDBN} can only use certain character sets as its host character -set. We list the character set names @value{GDBN} recognizes below, and -indicate which can be host character sets, but if you type -@code{set target-charset} followed by @key{TAB}@key{TAB}, @value{GDBN} will -list the host character sets it supports. +set. If you type @code{set target-charset} followed by +@key{TAB}@key{TAB}, @value{GDBN} will list the host character sets it +supports. @item set charset @var{charset} @kindex set charset @@ -8002,37 +8005,6 @@ Show the name of the current target charset. @end table -@value{GDBN} currently includes support for the following character -sets: - -@table @code - -@item ASCII -@cindex ASCII character set -Seven-bit U.S. @sc{ascii}. @value{GDBN} can use this as its host -character set. - -@item ISO-8859-1 -@cindex ISO 8859-1 character set -@cindex ISO Latin 1 character set -The ISO Latin 1 character set. This extends @sc{ascii} with accented -characters needed for French, German, and Spanish. @value{GDBN} can use -this as its host character set. - -@item EBCDIC-US -@itemx IBM1047 -@cindex EBCDIC character set -@cindex IBM1047 character set -Variants of the @sc{ebcdic} character set, used on some of IBM's -mainframe operating systems. (@sc{gnu}/Linux on the S/390 uses U.S. @sc{ascii}.) -@value{GDBN} cannot use these as its host character set. - -@end table - -Note that these are all single-byte character sets. More work inside -@value{GDBN} is needed to support multi-byte or variable-width character -encodings, like the UTF-8 and UCS-2 encodings of Unicode. - Here is an example of @value{GDBN}'s character set support in action. Assume that the following source code has been placed in the file @file{charset-test.c}: @@ -24826,6 +24798,24 @@ The @samp{zlib} library is likely included with your operating system distribution; if it is not, you can get the latest version from @url{http://zlib.net}. +@item iconv +@value{GDBN}'s features related to character sets (@pxref{Character +Sets}) require a functioning @code{iconv} implementation. If you are +on a GNU system, then this is provided by the GNU C Library. Some +other systems also provide a working @code{iconv}. + +On systems with @code{iconv}, you can install GNU Libiconv. If you +have previously installed Libiconv, you can use the +@option{--with-libiconv-prefix} option to configure. + +@value{GDBN}'s top-level @file{configure} and @file{Makefile} will +arrange to build Libiconv if a directory named @file{libiconv} appears +in the top-most source directory. If Libiconv is built this way, and +if the operating system does not provide a suitable @code{iconv} +implementation, then the just-built library will automatically be used +by @value{GDBN}. One easy way to set this up is to download GNU +Libiconv, unpack it, and then rename the directory holding the +Libiconv source code to @samp{libiconv}. @end table @node Running Configure diff --git a/gdb/expprint.c b/gdb/expprint.c index d7fac86..6048a38 100644 --- a/gdb/expprint.c +++ b/gdb/expprint.c @@ -186,8 +186,8 @@ print_subexp_standard (struct expression *exp, int *pos, If necessary, we can temporarily set it to zero, or pass it as an additional parameter to LA_PRINT_STRING. -fnf */ get_user_print_options (&opts); - LA_PRINT_STRING (stream, &exp->elts[pc + 2].string, nargs, 1, 0, - &opts); + LA_PRINT_STRING (stream, builtin_type (exp->gdbarch)->builtin_char, + &exp->elts[pc + 2].string, nargs, 0, &opts); } return; @@ -205,8 +205,8 @@ print_subexp_standard (struct expression *exp, int *pos, (*pos) += 3 + BYTES_TO_EXP_ELEM (nargs + 1); fputs_filtered ("@\"", stream); get_user_print_options (&opts); - LA_PRINT_STRING (stream, &exp->elts[pc + 2].string, nargs, 1, 0, - &opts); + LA_PRINT_STRING (stream, builtin_type (exp->gdbarch)->builtin_char, + &exp->elts[pc + 2].string, nargs, 0, &opts); fputs_filtered ("\"", stream); } return; @@ -291,8 +291,8 @@ print_subexp_standard (struct expression *exp, int *pos, { struct value_print_options opts; get_user_print_options (&opts); - LA_PRINT_STRING (stream, tempstr, nargs - 1, 1, 0, - &opts); + LA_PRINT_STRING (stream, builtin_type (exp->gdbarch)->builtin_char, + tempstr, nargs - 1, 0, &opts); (*pos) = pc; } else diff --git a/gdb/f-lang.c b/gdb/f-lang.c index 63598413..c60bfdb 100644 --- a/gdb/f-lang.c +++ b/gdb/f-lang.c @@ -70,8 +70,9 @@ static SAVED_F77_COMMON_PTR allocate_saved_f77_common_node (void); static void patch_common_entries (SAVED_F77_COMMON_PTR, CORE_ADDR, int); #endif -static void f_printchar (int c, struct ui_file * stream); -static void f_emit_char (int c, struct ui_file * stream, int quoter); +static void f_printchar (int c, struct type *type, struct ui_file * stream); +static void f_emit_char (int c, struct type *type, + struct ui_file * stream, int quoter); /* Print the character C on STREAM as part of the contents of a literal string whose delimiter is QUOTER. Note that that format for printing @@ -80,7 +81,7 @@ static void f_emit_char (int c, struct ui_file * stream, int quoter); be replaced with a true F77 version. */ static void -f_emit_char (int c, struct ui_file *stream, int quoter) +f_emit_char (int c, struct type *type, struct ui_file *stream, int quoter) { c &= 0xFF; /* Avoid sign bit follies */ @@ -126,10 +127,10 @@ f_emit_char (int c, struct ui_file *stream, int quoter) be replaced with a true F77version. */ static void -f_printchar (int c, struct ui_file *stream) +f_printchar (int c, struct type *type, struct ui_file *stream) { fputs_filtered ("'", stream); - LA_EMIT_CHAR (c, stream, '\''); + LA_EMIT_CHAR (c, type, stream, '\''); fputs_filtered ("'", stream); } @@ -141,14 +142,15 @@ f_printchar (int c, struct ui_file *stream) be replaced with a true F77 version. */ static void -f_printstr (struct ui_file *stream, const gdb_byte *string, - unsigned int length, int width, int force_ellipses, +f_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string, + unsigned int length, int force_ellipses, const struct value_print_options *options) { unsigned int i; unsigned int things_printed = 0; int in_quotes = 0; int need_comma = 0; + int width = TYPE_LENGTH (type); if (length == 0) { @@ -190,7 +192,7 @@ f_printstr (struct ui_file *stream, const gdb_byte *string, fputs_filtered ("', ", stream); in_quotes = 0; } - f_printchar (string[i], stream); + f_printchar (string[i], type, stream); fprintf_filtered (stream, " <repeats %u times>", reps); i = rep1 - 1; things_printed += options->repeat_count_threshold; @@ -206,7 +208,7 @@ f_printstr (struct ui_file *stream, const gdb_byte *string, fputs_filtered ("'", stream); in_quotes = 1; } - LA_EMIT_CHAR (string[i], stream, '"'); + LA_EMIT_CHAR (string[i], type, stream, '"'); ++things_printed; } } diff --git a/gdb/f-valprint.c b/gdb/f-valprint.c index 5721041..1ecc310 100644 --- a/gdb/f-valprint.c +++ b/gdb/f-valprint.c @@ -256,7 +256,8 @@ f_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, { case TYPE_CODE_STRING: f77_get_dynamic_length_of_aggregate (type); - LA_PRINT_STRING (stream, valaddr, TYPE_LENGTH (type), 1, 0, options); + LA_PRINT_STRING (stream, builtin_type (current_gdbarch)->builtin_char, + valaddr, TYPE_LENGTH (type), 0, options); break; case TYPE_CODE_ARRAY: @@ -293,7 +294,7 @@ f_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, && TYPE_CODE (elttype) == TYPE_CODE_INT && (options->format == 0 || options->format == 's') && addr != 0) - i = val_print_string (addr, -1, TYPE_LENGTH (elttype), stream, + i = val_print_string (TYPE_TARGET_TYPE (type), addr, -1, stream, options); /* Return number of characters printed, including the terminating @@ -365,7 +366,7 @@ f_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, { fputs_filtered (" ", stream); LA_PRINT_CHAR ((unsigned char) unpack_long (type, valaddr), - stream); + type, stream); } } break; diff --git a/gdb/gdb_locale.h b/gdb/gdb_locale.h index e8ba0ea..4fa4d3d 100644 --- a/gdb/gdb_locale.h +++ b/gdb/gdb_locale.h @@ -41,4 +41,8 @@ # define N_(String) (String) #endif +#ifdef HAVE_LANGINFO_CODESET +#include <langinfo.h> +#endif + #endif /* GDB_LOCALE_H */ diff --git a/gdb/gdb_obstack.h b/gdb/gdb_obstack.h index 48f49cd..757d8e9 100644 --- a/gdb/gdb_obstack.h +++ b/gdb/gdb_obstack.h @@ -45,4 +45,7 @@ #define obstack_grow_str0(OBSTACK,STRING) \ obstack_grow0 (OBSTACK, STRING, strlen (STRING)) +#define obstack_grow_wstr(OBSTACK, WSTRING) \ + obstack_grow (OBSTACK, WSTRING, sizeof (gdb_wchar_t) * gdb_wcslen (WSTRING)) + #endif diff --git a/gdb/gdb_wchar.h b/gdb/gdb_wchar.h new file mode 100644 index 0000000..583140e --- /dev/null +++ b/gdb/gdb_wchar.h @@ -0,0 +1,62 @@ +/* Wide characters for gdb + Copyright (C) 2009 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef GDB_WCHAR_H +#define GDB_WCHAR_H + +/* If this host has wchar_t and if iconv is available (perhaps via GNU + libiconv), then we arrange to use those. Otherwise, we provide a + phony iconv which only handles a single character set, and we + provide wrappers for the wchar_t functionality we use. */ +#if defined(HAVE_ICONV) && defined(HAVE_WCHAR_H) + +#include <iconv.h> +#include <wchar.h> +#include <wctype.h> + +typedef wchar_t gdb_wchar_t; +typedef wint_t gdb_wint_t; + +#define gdb_wcslen wcslen +#define gdb_iswprint iswprint +#define gdb_iswdigit iswdigit +#define gdb_btowc btowc +#define gdb_WEOF WEOF + +#define LCST(X) L ## X + +#else + +typedef char gdb_wchar_t; +typedef int gdb_wint_t; + +#define gdb_wcslen strlen +#define gdb_iswprint isprint +#define gdb_iswdigit isdigit +#define gdb_btowc /* empty */ +#define gdb_WEOF EOF + +#define LCST(X) X + +/* This define is used elsewhere so we don't need to duplicate the + same checking logic in multiple places. */ +#define PHONY_ICONV + +#endif /* defined(HAVE_ICONV) && defined(HAVE_WCHAR_H) */ + +#endif /* GDB_WCHAR_H */ diff --git a/gdb/jv-lang.c b/gdb/jv-lang.c index b702ebf..0d0f4bc 100644 --- a/gdb/jv-lang.c +++ b/gdb/jv-lang.c @@ -61,7 +61,8 @@ static char *get_java_utf8_name (struct obstack *obstack, struct value *name); static int java_class_is_primitive (struct value *clas); static struct value *java_value_string (char *ptr, int len); -static void java_emit_char (int c, struct ui_file * stream, int quoter); +static void java_emit_char (int c, struct type *type, + struct ui_file * stream, int quoter); static char *java_class_name_from_physname (const char *physname); @@ -796,7 +797,7 @@ java_value_string (char *ptr, int len) characters and strings is language specific. */ static void -java_emit_char (int c, struct ui_file *stream, int quoter) +java_emit_char (int c, struct type *type, struct ui_file *stream, int quoter) { switch (c) { diff --git a/gdb/jv-valprint.c b/gdb/jv-valprint.c index d3606fd..a1b8a71 100644 --- a/gdb/jv-valprint.c +++ b/gdb/jv-valprint.c @@ -230,7 +230,7 @@ java_value_print (struct value *val, struct ui_file *stream, value_free_to_mark (mark); /* Release unnecessary values */ - val_print_string (data + boffset, count, 2, stream, options); + val_print_string (java_char_type, data + boffset, count, stream, options); return 0; } @@ -520,7 +520,7 @@ java_val_print (struct type *type, const gdb_byte *valaddr, || (TYPE_CODE (type) == TYPE_CODE_INT && TYPE_LENGTH (type) == 2 && strcmp (TYPE_NAME (type), "char") == 0)) - LA_PRINT_CHAR ((int) unpack_long (type, valaddr), stream); + LA_PRINT_CHAR ((int) unpack_long (type, valaddr), type, stream); else val_print_type_code_int (type, valaddr, stream); break; diff --git a/gdb/language.c b/gdb/language.c index 3c37a64..6209d7f 100644 --- a/gdb/language.c +++ b/gdb/language.c @@ -65,9 +65,11 @@ static void set_check (char *, int); static void set_type_range_case (void); -static void unk_lang_emit_char (int c, struct ui_file *stream, int quoter); +static void unk_lang_emit_char (int c, struct type *type, + struct ui_file *stream, int quoter); -static void unk_lang_printchar (int c, struct ui_file *stream); +static void unk_lang_printchar (int c, struct type *type, + struct ui_file *stream); static void unk_lang_print_type (struct type *, char *, struct ui_file *, int, int); @@ -1065,20 +1067,22 @@ unk_lang_error (char *msg) } static void -unk_lang_emit_char (int c, struct ui_file *stream, int quoter) +unk_lang_emit_char (int c, struct type *type, struct ui_file *stream, + int quoter) { error (_("internal error - unimplemented function unk_lang_emit_char called.")); } static void -unk_lang_printchar (int c, struct ui_file *stream) +unk_lang_printchar (int c, struct type *type, struct ui_file *stream) { error (_("internal error - unimplemented function unk_lang_printchar called.")); } static void -unk_lang_printstr (struct ui_file *stream, const gdb_byte *string, - unsigned int length, int width, int force_ellipses, +unk_lang_printstr (struct ui_file *stream, struct type *type, + const gdb_byte *string, unsigned int length, + int force_ellipses, const struct value_print_options *options) { error (_("internal error - unimplemented function unk_lang_printstr called.")); diff --git a/gdb/language.h b/gdb/language.h index 85826fd..e5f80ab 100644 --- a/gdb/language.h +++ b/gdb/language.h @@ -186,14 +186,15 @@ struct language_defn void (*la_post_parser) (struct expression ** expp, int void_context_p); - void (*la_printchar) (int ch, struct ui_file * stream); + void (*la_printchar) (int ch, struct type *chtype, struct ui_file * stream); - void (*la_printstr) (struct ui_file * stream, const gdb_byte *string, - unsigned int length, int width, + void (*la_printstr) (struct ui_file * stream, struct type *elttype, + const gdb_byte *string, unsigned int length, int force_ellipses, const struct value_print_options *); - void (*la_emitchar) (int ch, struct ui_file * stream, int quoter); + void (*la_emitchar) (int ch, struct type *chtype, + struct ui_file * stream, int quoter); /* Print a type using syntax appropriate for this language. */ @@ -381,13 +382,13 @@ extern enum language set_language (enum language); #define LA_VALUE_PRINT(val,stream,options) \ (current_language->la_value_print(val,stream,options)) -#define LA_PRINT_CHAR(ch, stream) \ - (current_language->la_printchar(ch, stream)) -#define LA_PRINT_STRING(stream, string, length, width, force_ellipses,options) \ - (current_language->la_printstr(stream, string, length, width, \ +#define LA_PRINT_CHAR(ch, type, stream) \ + (current_language->la_printchar(ch, type, stream)) +#define LA_PRINT_STRING(stream, elttype, string, length, force_ellipses,options) \ + (current_language->la_printstr(stream, elttype, string, length, \ force_ellipses,options)) -#define LA_EMIT_CHAR(ch, stream, quoter) \ - (current_language->la_emitchar(ch, stream, quoter)) +#define LA_EMIT_CHAR(ch, type, stream, quoter) \ + (current_language->la_emitchar(ch, type, stream, quoter)) #define LA_GET_STRING(value, buffer, length, encoding) \ (current_language->la_get_string(value, buffer, length, encoding)) diff --git a/gdb/m2-lang.c b/gdb/m2-lang.c index 9e4bb1b..9ca3ae1 100644 --- a/gdb/m2-lang.c +++ b/gdb/m2-lang.c @@ -29,8 +29,8 @@ #include "valprint.h" extern void _initialize_m2_language (void); -static void m2_printchar (int, struct ui_file *); -static void m2_emit_char (int, struct ui_file *, int); +static void m2_printchar (int, struct type *, struct ui_file *); +static void m2_emit_char (int, struct type *, struct ui_file *, int); /* Print the character C on STREAM as part of the contents of a literal string whose delimiter is QUOTER. Note that that format for printing @@ -39,7 +39,7 @@ static void m2_emit_char (int, struct ui_file *, int); be replaced with a true Modula version. */ static void -m2_emit_char (int c, struct ui_file *stream, int quoter) +m2_emit_char (int c, struct type *type, struct ui_file *stream, int quoter) { c &= 0xFF; /* Avoid sign bit follies */ @@ -88,10 +88,10 @@ m2_emit_char (int c, struct ui_file *stream, int quoter) be replaced with a true Modula version. */ static void -m2_printchar (int c, struct ui_file *stream) +m2_printchar (int c, struct type *type, struct ui_file *stream) { fputs_filtered ("'", stream); - LA_EMIT_CHAR (c, stream, '\''); + LA_EMIT_CHAR (c, type, stream, '\''); fputs_filtered ("'", stream); } @@ -103,14 +103,15 @@ m2_printchar (int c, struct ui_file *stream) be replaced with a true Modula version. */ static void -m2_printstr (struct ui_file *stream, const gdb_byte *string, - unsigned int length, int width, int force_ellipses, +m2_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string, + unsigned int length, int force_ellipses, const struct value_print_options *options) { unsigned int i; unsigned int things_printed = 0; int in_quotes = 0; int need_comma = 0; + int width = TYPE_LENGTH (type); if (length == 0) { @@ -152,7 +153,7 @@ m2_printstr (struct ui_file *stream, const gdb_byte *string, fputs_filtered ("\", ", stream); in_quotes = 0; } - m2_printchar (string[i], stream); + m2_printchar (string[i], type, stream); fprintf_filtered (stream, " <repeats %u times>", reps); i = rep1 - 1; things_printed += options->repeat_count_threshold; @@ -168,7 +169,7 @@ m2_printstr (struct ui_file *stream, const gdb_byte *string, fputs_filtered ("\"", stream); in_quotes = 1; } - LA_EMIT_CHAR (string[i], stream, '"'); + LA_EMIT_CHAR (string[i], type, stream, '"'); ++things_printed; } } diff --git a/gdb/m2-valprint.c b/gdb/m2-valprint.c index 71c410c..41fb8fe 100644 --- a/gdb/m2-valprint.c +++ b/gdb/m2-valprint.c @@ -237,7 +237,8 @@ print_unpacked_pointer (struct type *type, && TYPE_CODE (elttype) == TYPE_CODE_INT && (options->format == 0 || options->format == 's') && addr != 0) - return val_print_string (addr, -1, TYPE_LENGTH (elttype), stream, options); + return val_print_string (TYPE_TARGET_TYPE (type), addr, -1, + stream, options); return 0; } @@ -294,7 +295,7 @@ m2_print_array_contents (struct type *type, const gdb_byte *valaddr, || ((current_language->la_language == language_m2) && (TYPE_CODE (type) == TYPE_CODE_CHAR))) && (options->format == 0 || options->format == 's')) - val_print_string (address, len+1, eltlen, stream, options); + val_print_string (type, address, len+1, stream, options); else { fprintf_filtered (stream, "{"); @@ -359,7 +360,8 @@ m2_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, len = temp_len; } - LA_PRINT_STRING (stream, valaddr + embedded_offset, len, 1, 0, + LA_PRINT_STRING (stream, TYPE_TARGET_TYPE (type), + valaddr + embedded_offset, len, 0, options); i = len; } @@ -547,7 +549,7 @@ m2_val_print (struct type *type, const gdb_byte *valaddr, int embedded_offset, else fprintf_filtered (stream, "%d", (int) val); fputs_filtered (" ", stream); - LA_PRINT_CHAR ((unsigned char) val, stream); + LA_PRINT_CHAR ((unsigned char) val, type, stream); } break; diff --git a/gdb/macroexp.c b/gdb/macroexp.c index f0a8c1f..752a939 100644 --- a/gdb/macroexp.c +++ b/gdb/macroexp.c @@ -23,6 +23,7 @@ #include "macrotab.h" #include "macroexp.h" #include "gdb_assert.h" +#include "c-lang.h" @@ -320,14 +321,17 @@ get_character_constant (struct macro_buffer *tok, char *p, char *end) way GDB's C/C++ lexer does. So we call parse_escape in utils.c to handle escape sequences. */ if ((p + 1 <= end && *p == '\'') - || (p + 2 <= end && p[0] == 'L' && p[1] == '\'')) + || (p + 2 <= end + && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U') + && p[1] == '\'')) { char *tok_start = p; char *body_start; + int char_count = 0; if (*p == '\'') p++; - else if (*p == 'L') + else if (*p == 'L' || *p == 'u' || *p == 'U') p += 2; else gdb_assert (0); @@ -339,7 +343,7 @@ get_character_constant (struct macro_buffer *tok, char *p, char *end) error (_("Unmatched single quote.")); else if (*p == '\'') { - if (p == body_start) + if (!char_count) error (_("A character constant must contain at least one " "character.")); p++; @@ -348,10 +352,13 @@ get_character_constant (struct macro_buffer *tok, char *p, char *end) else if (*p == '\\') { p++; - parse_escape (&p); + char_count += c_parse_escape (&p, NULL); } else - p++; + { + p++; + char_count++; + } } set_token (tok, tok_start, p); @@ -370,16 +377,16 @@ static int get_string_literal (struct macro_buffer *tok, char *p, char *end) { if ((p + 1 <= end - && *p == '\"') + && *p == '"') || (p + 2 <= end - && p[0] == 'L' - && p[1] == '\"')) + && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U') + && p[1] == '"')) { char *tok_start = p; - if (*p == '\"') + if (*p == '"') p++; - else if (*p == 'L') + else if (*p == 'L' || *p == 'u' || *p == 'U') p += 2; else gdb_assert (0); @@ -388,7 +395,7 @@ get_string_literal (struct macro_buffer *tok, char *p, char *end) { if (p >= end) error (_("Unterminated string in expression.")); - else if (*p == '\"') + else if (*p == '"') { p++; break; @@ -399,7 +406,7 @@ get_string_literal (struct macro_buffer *tok, char *p, char *end) else if (*p == '\\') { p++; - parse_escape (&p); + c_parse_escape (&p, NULL); } else p++; diff --git a/gdb/objc-lang.c b/gdb/objc-lang.c index a6c74a3..9b8d801 100644 --- a/gdb/objc-lang.c +++ b/gdb/objc-lang.c @@ -280,7 +280,7 @@ objc_demangle (const char *mangled, int options) for printing characters and strings is language specific. */ static void -objc_emit_char (int c, struct ui_file *stream, int quoter) +objc_emit_char (int c, struct type *type, struct ui_file *stream, int quoter) { c &= 0xFF; /* Avoid sign bit follies. */ @@ -326,10 +326,10 @@ objc_emit_char (int c, struct ui_file *stream, int quoter) } static void -objc_printchar (int c, struct ui_file *stream) +objc_printchar (int c, struct type *type, struct ui_file *stream) { fputs_filtered ("'", stream); - objc_emit_char (c, stream, '\''); + objc_emit_char (c, type, stream, '\''); fputs_filtered ("'", stream); } @@ -340,14 +340,16 @@ objc_printchar (int c, struct ui_file *stream) FORCE_ELLIPSES. */ static void -objc_printstr (struct ui_file *stream, const gdb_byte *string, - unsigned int length, int width, int force_ellipses, +objc_printstr (struct ui_file *stream, struct type *type, + const gdb_byte *string, unsigned int length, + int force_ellipses, const struct value_print_options *options) { unsigned int i; unsigned int things_printed = 0; int in_quotes = 0; int need_comma = 0; + int width = TYPE_LENGTH (type); /* If the string was not truncated due to `set print elements', and the last byte of it is a null, we don't print that, in @@ -395,7 +397,7 @@ objc_printstr (struct ui_file *stream, const gdb_byte *string, fputs_filtered ("\", ", stream); in_quotes = 0; } - objc_printchar (string[i], stream); + objc_printchar (string[i], type, stream); fprintf_filtered (stream, " <repeats %u times>", reps); i = rep1 - 1; things_printed += options->repeat_count_threshold; @@ -411,7 +413,7 @@ objc_printstr (struct ui_file *stream, const gdb_byte *string, fputs_filtered ("\"", stream); in_quotes = 1; } - objc_emit_char (string[i], stream, '"'); + objc_emit_char (string[i], type, stream, '"'); ++things_printed; } } diff --git a/gdb/p-lang.c b/gdb/p-lang.c index 41da3e0..e743a6f 100644 --- a/gdb/p-lang.c +++ b/gdb/p-lang.c @@ -97,7 +97,8 @@ pascal_main_name (void) but this does not happen for Free Pascal nor for GPC. */ int is_pascal_string_type (struct type *type,int *length_pos, - int *length_size, int *string_pos, int *char_size, + int *length_size, int *string_pos, + struct type **char_type, char **arrayname) { if (TYPE_CODE (type) == TYPE_CODE_STRUCT) @@ -114,8 +115,8 @@ is_pascal_string_type (struct type *type,int *length_pos, *length_size = TYPE_LENGTH (TYPE_FIELD_TYPE (type, 0)); if (string_pos) *string_pos = TYPE_FIELD_BITPOS (type, 1) / TARGET_CHAR_BIT; - if (char_size) - *char_size = 1; + if (char_type) + *char_type = TYPE_TARGET_TYPE (TYPE_FIELD_TYPE (type, 1)); if (arrayname) *arrayname = TYPE_FIELDS (type)[1].name; return 2; @@ -126,7 +127,6 @@ is_pascal_string_type (struct type *type,int *length_pos, && strcmp (TYPE_FIELDS (type)[0].name, "Capacity") == 0 && strcmp (TYPE_FIELDS (type)[1].name, "length") == 0) { - struct type *char_type; if (length_pos) *length_pos = TYPE_FIELD_BITPOS (type, 1) / TARGET_CHAR_BIT; if (length_size) @@ -134,13 +134,12 @@ is_pascal_string_type (struct type *type,int *length_pos, if (string_pos) *string_pos = TYPE_FIELD_BITPOS (type, 2) / TARGET_CHAR_BIT; /* FIXME: how can I detect wide chars in GPC ?? */ - char_type = TYPE_FIELD_TYPE (type,2); - if (char_size && TYPE_CODE (char_type) == TYPE_CODE_ARRAY) + if (char_type) { - *char_size = TYPE_LENGTH (TYPE_TARGET_TYPE (char_type)); + *char_type = TYPE_TARGET_TYPE (TYPE_FIELD_TYPE (type, 2)); + if (TYPE_CODE (*char_type) == TYPE_CODE_ARRAY) + *char_type = TYPE_TARGET_TYPE (*char_type); } - else if (char_size) - *char_size = 1; if (arrayname) *arrayname = TYPE_FIELDS (type)[2].name; return 3; @@ -182,14 +181,15 @@ pascal_one_char (int c, struct ui_file *stream, int *in_quotes) } } -static void pascal_emit_char (int c, struct ui_file *stream, int quoter); +static void pascal_emit_char (int c, struct type *type, + struct ui_file *stream, int quoter); /* Print the character C on STREAM as part of the contents of a literal string whose delimiter is QUOTER. Note that that format for printing characters and strings is language specific. */ static void -pascal_emit_char (int c, struct ui_file *stream, int quoter) +pascal_emit_char (int c, struct type *type, struct ui_file *stream, int quoter) { int in_quotes = 0; pascal_one_char (c, stream, &in_quotes); @@ -198,7 +198,7 @@ pascal_emit_char (int c, struct ui_file *stream, int quoter) } void -pascal_printchar (int c, struct ui_file *stream) +pascal_printchar (int c, struct type *type, struct ui_file *stream) { int in_quotes = 0; pascal_one_char (c, stream, &in_quotes); @@ -212,14 +212,16 @@ pascal_printchar (int c, struct ui_file *stream) had to stop before printing LENGTH characters, or if FORCE_ELLIPSES. */ void -pascal_printstr (struct ui_file *stream, const gdb_byte *string, - unsigned int length, int width, int force_ellipses, +pascal_printstr (struct ui_file *stream, struct type *type, + const gdb_byte *string, unsigned int length, + int force_ellipses, const struct value_print_options *options) { unsigned int i; unsigned int things_printed = 0; int in_quotes = 0; int need_comma = 0; + int width = TYPE_LENGTH (type); /* If the string was not truncated due to `set print elements', and the last byte of it is a null, we don't print that, in traditional C @@ -273,7 +275,7 @@ pascal_printstr (struct ui_file *stream, const gdb_byte *string, fputs_filtered ("', ", stream); in_quotes = 0; } - pascal_printchar (current_char, stream); + pascal_printchar (current_char, type, stream); fprintf_filtered (stream, " <repeats %u times>", reps); i = rep1 - 1; things_printed += options->repeat_count_threshold; diff --git a/gdb/p-lang.h b/gdb/p-lang.h index 09a4569..2b2eb2d 100644 --- a/gdb/p-lang.h +++ b/gdb/p-lang.h @@ -48,12 +48,13 @@ extern void pascal_type_print_method_args (char *, char *, /* These are in p-lang.c: */ extern int - is_pascal_string_type (struct type *, int *, int *, int *, int *, char **); + is_pascal_string_type (struct type *, int *, int *, int *, + struct type **, char **); -extern void pascal_printchar (int, struct ui_file *); +extern void pascal_printchar (int, struct type *, struct ui_file *); -extern void pascal_printstr (struct ui_file *, const gdb_byte *, - unsigned int, int, int, +extern void pascal_printstr (struct ui_file *, struct type *, const gdb_byte *, + unsigned int, int, const struct value_print_options *); extern struct type **const (pascal_builtin_types[]); diff --git a/gdb/p-valprint.c b/gdb/p-valprint.c index 27ae619..2d1cb08 100644 --- a/gdb/p-valprint.c +++ b/gdb/p-valprint.c @@ -61,7 +61,7 @@ pascal_val_print (struct type *type, const gdb_byte *valaddr, struct type *elttype; unsigned eltlen; int length_pos, length_size, string_pos; - int char_size; + struct type *char_type; LONGEST val; CORE_ADDR addr; @@ -100,8 +100,9 @@ pascal_val_print (struct type *type, const gdb_byte *valaddr, len = temp_len; } - LA_PRINT_STRING (stream, valaddr + embedded_offset, len, - eltlen, 0, options); + LA_PRINT_STRING (stream, TYPE_TARGET_TYPE (type), + valaddr + embedded_offset, len, 0, + options); i = len; } else @@ -175,8 +176,7 @@ pascal_val_print (struct type *type, const gdb_byte *valaddr, && addr != 0) { /* no wide string yet */ - i = val_print_string (addr, -1, TYPE_LENGTH (elttype), stream, - options); + i = val_print_string (elttype, addr, -1, stream, options); } /* also for pointers to pascal strings */ /* Note: this is Free Pascal specific: @@ -184,7 +184,7 @@ pascal_val_print (struct type *type, const gdb_byte *valaddr, Pascal strings are mapped to records with lowercase names PM */ if (is_pascal_string_type (elttype, &length_pos, &length_size, - &string_pos, &char_size, NULL) + &string_pos, &char_type, NULL) && addr != 0) { ULONGEST string_length; @@ -193,7 +193,7 @@ pascal_val_print (struct type *type, const gdb_byte *valaddr, read_memory (addr + length_pos, buffer, length_size); string_length = extract_unsigned_integer (buffer, length_size); xfree (buffer); - i = val_print_string (addr + string_pos, string_length, char_size, stream, options); + i = val_print_string (char_type ,addr + string_pos, string_length, stream, options); } else if (pascal_object_is_vtbl_member (type)) { @@ -298,10 +298,10 @@ pascal_val_print (struct type *type, const gdb_byte *valaddr, else { if (is_pascal_string_type (type, &length_pos, &length_size, - &string_pos, &char_size, NULL)) + &string_pos, &char_type, NULL)) { len = extract_unsigned_integer (valaddr + embedded_offset + length_pos, length_size); - LA_PRINT_STRING (stream, valaddr + embedded_offset + string_pos, len, char_size, 0, options); + LA_PRINT_STRING (stream, char_type, valaddr + embedded_offset + string_pos, len, 0, options); } else pascal_object_print_value_fields (type, valaddr + embedded_offset, address, stream, @@ -426,7 +426,7 @@ pascal_val_print (struct type *type, const gdb_byte *valaddr, else fprintf_filtered (stream, "%d", (int) val); fputs_filtered (" ", stream); - LA_PRINT_CHAR ((unsigned char) val, stream); + LA_PRINT_CHAR ((unsigned char) val, type, stream); } break; diff --git a/gdb/parse.c b/gdb/parse.c index eee1f8e..8bb6404 100644 --- a/gdb/parse.c +++ b/gdb/parse.c @@ -352,6 +352,65 @@ write_exp_string (struct stoken str) write_exp_elt_longcst ((LONGEST) len); } +/* Add a vector of string constants to the end of the expression. + + This adds an OP_STRING operation, but encodes the contents + differently from write_exp_string. The language is expected to + handle evaluation of this expression itself. + + After the usual OP_STRING header, TYPE is written into the + expression as a long constant. The interpretation of this field is + up to the language evaluator. + + Next, each string in VEC is written. The length is written as a + long constant, followed by the contents of the string. */ + +void +write_exp_string_vector (int type, struct stoken_vector *vec) +{ + int i, n_slots, len; + + /* Compute the size. We compute the size in number of slots to + avoid issues with string padding. */ + n_slots = 0; + for (i = 0; i < vec->len; ++i) + { + /* One slot for the length of this element, plus the number of + slots needed for this string. */ + n_slots += 1 + BYTES_TO_EXP_ELEM (vec->tokens[i].length); + } + + /* One more slot for the type of the string. */ + ++n_slots; + + /* Now compute a phony string length. */ + len = EXP_ELEM_TO_BYTES (n_slots) - 1; + + n_slots += 4; + if ((expout_ptr + n_slots) >= expout_size) + { + expout_size = max (expout_size * 2, expout_ptr + n_slots + 10); + expout = (struct expression *) + xrealloc ((char *) expout, (sizeof (struct expression) + + EXP_ELEM_TO_BYTES (expout_size))); + } + + write_exp_elt_opcode (OP_STRING); + write_exp_elt_longcst (len); + write_exp_elt_longcst (type); + + for (i = 0; i < vec->len; ++i) + { + write_exp_elt_longcst (vec->tokens[i].length); + memcpy (&expout->elts[expout_ptr], vec->tokens[i].ptr, + vec->tokens[i].length); + expout_ptr += BYTES_TO_EXP_ELEM (vec->tokens[i].length); + } + + write_exp_elt_longcst (len); + write_exp_elt_opcode (OP_STRING); +} + /* Add a bitstring constant to the end of the expression. Bitstring constants are stored by first writing an expression element diff --git a/gdb/parser-defs.h b/gdb/parser-defs.h index 2c4b755..cbda9c3 100644 --- a/gdb/parser-defs.h +++ b/gdb/parser-defs.h @@ -69,6 +69,22 @@ struct stoken int length; }; +struct typed_stoken + { + /* A language-specific type field. */ + int type; + /* Pointer to first byte of char-string or first bit of bit-string */ + char *ptr; + /* Length of string in bytes for char-string or bits for bit-string */ + int length; + }; + +struct stoken_vector + { + int len; + struct typed_stoken *tokens; + }; + struct ttype { struct stoken stoken; @@ -130,6 +146,8 @@ extern void write_exp_elt_intern (struct internalvar *); extern void write_exp_string (struct stoken); +void write_exp_string_vector (int type, struct stoken_vector *vec); + extern void write_exp_bitstring (struct stoken); extern void write_exp_elt_block (struct block *); diff --git a/gdb/printcmd.c b/gdb/printcmd.c index 5268b1d..750244e 100644 --- a/gdb/printcmd.c +++ b/gdb/printcmd.c @@ -48,6 +48,7 @@ #include "solist.h" #include "solib.h" #include "parser-defs.h" +#include "charset.h" #ifdef TUI #include "tui/tui.h" /* For tui_active et.al. */ @@ -276,10 +277,13 @@ print_formatted (struct value *val, int size, switch (options->format) { case 's': - /* FIXME: Need to handle wchar_t's here... */ - next_address = VALUE_ADDRESS (val) - + val_print_string (VALUE_ADDRESS (val), -1, 1, stream, - options); + { + struct type *elttype = value_type (val); + next_address = (VALUE_ADDRESS (val) + + val_print_string (elttype, + VALUE_ADDRESS (val), -1, + stream, options)); + } return; case 'i': @@ -374,7 +378,7 @@ print_scalar_formatted (const void *valaddr, struct type *type, print_hex_chars (stream, valaddr, len, byte_order); return; case 'c': - print_char_chars (stream, valaddr, len, byte_order); + print_char_chars (stream, type, valaddr, len, byte_order); return; default: break; @@ -1958,7 +1962,8 @@ printf_command (char *arg, int from_tty) enum argclass { - int_arg, long_arg, long_long_arg, ptr_arg, string_arg, + int_arg, long_arg, long_long_arg, ptr_arg, + string_arg, wide_string_arg, wide_char_arg, double_arg, long_double_arg, decfloat_arg }; enum argclass *argclass; @@ -2090,8 +2095,8 @@ printf_command (char *arg, int from_tty) break; case 'c': - this_argclass = int_arg; - if (lcount || seen_h || seen_big_l) + this_argclass = lcount == 0 ? int_arg : wide_char_arg; + if (lcount > 1 || seen_h || seen_big_l) bad = 1; if (seen_prec || seen_zero || seen_space || seen_plus) bad = 1; @@ -2106,8 +2111,8 @@ printf_command (char *arg, int from_tty) break; case 's': - this_argclass = string_arg; - if (lcount || seen_h || seen_big_l) + this_argclass = lcount == 0 ? string_arg : wide_string_arg; + if (lcount > 1 || seen_h || seen_big_l) bad = 1; if (seen_zero || seen_space || seen_plus) bad = 1; @@ -2159,6 +2164,15 @@ printf_command (char *arg, int from_tty) last_arg[length_before_ll + lcount]; current_substring += length_before_ll + 4; } + else if (this_argclass == wide_string_arg + || this_argclass == wide_char_arg) + { + /* Convert %ls or %lc to %s. */ + int length_before_ls = f - last_arg - 2; + strncpy (current_substring, last_arg, length_before_ls); + strcpy (current_substring + length_before_ls, "s"); + current_substring += length_before_ls + 2; + } else { strncpy (current_substring, last_arg, f - last_arg); @@ -2223,6 +2237,76 @@ printf_command (char *arg, int from_tty) printf_filtered (current_substring, (char *) str); } break; + case wide_string_arg: + { + gdb_byte *str; + CORE_ADDR tem; + int j; + struct type *wctype = lookup_typename ("wchar_t", NULL, 0); + int wcwidth = TYPE_LENGTH (wctype); + gdb_byte *buf = alloca (wcwidth); + struct obstack output; + struct cleanup *inner_cleanup; + + tem = value_as_address (val_args[i]); + + /* This is a %s argument. Find the length of the string. */ + for (j = 0;; j += wcwidth) + { + QUIT; + read_memory (tem + j, buf, wcwidth); + if (extract_unsigned_integer (buf, wcwidth) == 0) + break; + } + + /* Copy the string contents into a string inside GDB. */ + str = (gdb_byte *) alloca (j + wcwidth); + if (j != 0) + read_memory (tem, str, j); + memset (&str[j], 0, wcwidth); + + obstack_init (&output); + inner_cleanup = make_cleanup_obstack_free (&output); + + convert_between_encodings (target_wide_charset (), + host_charset (), + str, j, wcwidth, + &output, translit_char); + obstack_grow_str0 (&output, ""); + + printf_filtered (current_substring, obstack_base (&output)); + do_cleanups (inner_cleanup); + } + break; + case wide_char_arg: + { + struct type *wctype = lookup_typename ("wchar_t", NULL, 0); + struct type *valtype; + struct obstack output; + struct cleanup *inner_cleanup; + const gdb_byte *bytes; + + valtype = value_type (val_args[i]); + if (TYPE_LENGTH (valtype) != TYPE_LENGTH (wctype) + || TYPE_CODE (valtype) != TYPE_CODE_INT) + error (_("expected wchar_t argument for %%lc")); + + bytes = value_contents (val_args[i]); + + obstack_init (&output); + inner_cleanup = make_cleanup_obstack_free (&output); + + convert_between_encodings (target_wide_charset (), + host_charset (), + bytes, TYPE_LENGTH (valtype), + TYPE_LENGTH (valtype), + &output, translit_char); + obstack_grow_str0 (&output, ""); + + printf_filtered (current_substring, obstack_base (&output)); + do_cleanups (inner_cleanup); + } + break; case double_arg: { struct type *type = value_type (val_args[i]); diff --git a/gdb/scm-lang.c b/gdb/scm-lang.c index 345befd..e2568c8 100644 --- a/gdb/scm-lang.c +++ b/gdb/scm-lang.c @@ -43,14 +43,14 @@ static int in_eval_c (void); struct type *builtin_type_scm; void -scm_printchar (int c, struct ui_file *stream) +scm_printchar (int c, struct type *type, struct ui_file *stream) { fprintf_filtered (stream, "#\\%c", c); } static void -scm_printstr (struct ui_file *stream, const gdb_byte *string, - unsigned int length, int width, int force_ellipses, +scm_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string, + unsigned int length, int force_ellipses, const struct value_print_options *options) { fprintf_filtered (stream, "\"%s\"", string); diff --git a/gdb/scm-lang.h b/gdb/scm-lang.h index 6bf88f5..1798b2f 100644 --- a/gdb/scm-lang.h +++ b/gdb/scm-lang.h @@ -59,7 +59,7 @@ extern void scm_scmval_print (LONGEST, struct ui_file *, int, extern int is_scmvalue_type (struct type *); -extern void scm_printchar (int, struct ui_file *); +extern void scm_printchar (int, struct type *, struct ui_file *); extern struct value *scm_evaluate_string (char *, int); diff --git a/gdb/scm-valprint.c b/gdb/scm-valprint.c index f0a7642..a32add5 100644 --- a/gdb/scm-valprint.c +++ b/gdb/scm-valprint.c @@ -187,7 +187,8 @@ taloop: if (SCM_ICHRP (svalue)) { svalue = SCM_ICHR (svalue); - scm_printchar (svalue, stream); + scm_printchar (svalue, builtin_type (current_gdbarch)->builtin_char, + stream); break; } else if (SCM_IFLAGP (svalue) diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog index a409c4d..a2eddf1 100644 --- a/gdb/testsuite/ChangeLog +++ b/gdb/testsuite/ChangeLog @@ -1,3 +1,28 @@ +2009-03-20 Tom Tromey <tromey@redhat.com> + + * gdb.base/store.exp: Update for change to escape output. + * gdb.base/callfuncs.exp (fetch_all_registers): Update for change + to escape output. + * gdb.base/pointers.exp: Update for change to escape output. + * gdb.base/long_long.exp (gdb_test_long_long): Update for change + to escape output. + * gdb.base/constvars.exp (do_constvar_tests): Update for change to + escape output. + * gdb.base/call-rt-st.exp (print_struct_call): Update for change + to escape output. + * gdb.cp/ref-types.exp (gdb_start_again): Update for change to + escape output. + * gdb.base/setvar.exp: Update for change to escape output. + * lib/gdb.exp (default_gdb_start): Set LC_CTYPE to C. + * gdb.base/printcmds.exp (test_print_all_chars): Update for change + to escape output. + (test_print_string_constants): Likewise. + * gdb.base/charset.exp (valid_host_charset): Check size of + wchar_t. Handle UCS-2 and UCS-4. Add tests for wide and unicode + cases. Handle "auto"-related output. + * gdb.base/charset.c (char16_t, char32_t): New typedefs. + (uvar, Uvar): New globals. + 2009-03-19 Jerome Guitton <guitton@adacore.com> * gdb.ada/fixed_points/fixed_points.adb: Add a test on overprecise diff --git a/gdb/testsuite/gdb.base/call-rt-st.exp b/gdb/testsuite/gdb.base/call-rt-st.exp index 3359c70..f73dd7f 100644 --- a/gdb/testsuite/gdb.base/call-rt-st.exp +++ b/gdb/testsuite/gdb.base/call-rt-st.exp @@ -186,7 +186,7 @@ if {![gdb_skip_float_test "print print_two_floats(*f3)"] && \ if ![gdb_skip_stdio_test "print print_bit_flags_char(*cflags)"] { print_struct_call "print_bit_flags_char(*cflags)" \ - ".*alpha\[ \r\n\]+gamma\[ \r\n\]+epsilon\[ \r\n\]+.\[0-9\]+ = \\{alpha = 1 '\\\\001', beta = 0 '\\\\0', gamma = 1 '\\\\001', delta = 0 '\\\\0', epsilon = 1 '\\\\001', omega = 0 '\\\\0'\\}" + ".*alpha\[ \r\n\]+gamma\[ \r\n\]+epsilon\[ \r\n\]+.\[0-9\]+ = \\{alpha = 1 '\\\\1', beta = 0 '\\\\0', gamma = 1 '\\\\1', delta = 0 '\\\\0', epsilon = 1 '\\\\1', omega = 0 '\\\\0'\\}" } if ![gdb_skip_stdio_test "print print_bit_flags_short(*sflags)"] { diff --git a/gdb/testsuite/gdb.base/callfuncs.exp b/gdb/testsuite/gdb.base/callfuncs.exp index 6d8aa45..be6a872 100644 --- a/gdb/testsuite/gdb.base/callfuncs.exp +++ b/gdb/testsuite/gdb.base/callfuncs.exp @@ -437,7 +437,7 @@ gdb_test "print t_small_values(1,3,5,7,9,11,13,15,17,19)" \ "The program being debugged stopped while.*" \ "stop at nested call level 4" gdb_test "backtrace" \ - "\#0 t_small_values \\(arg1=1 '.001', arg2=3, arg3=5, arg4=7 '.a', arg5=9, arg6=11 '.v', arg7=13, arg8=15, arg9=17, arg10=19\\).*\#2 sum10 \\(i0=2, i1=4, i2=6, i3=8, i4=10, i5=12, i6=14, i7=16, i8=18, i9=20\\).*\#3 <function called from gdb>.*\#4 add \\(a=4, b=5\\).*\#5 <function called from gdb>.*\#6 add \\(a=2, b=3\\).*\#7 <function called from gdb>.*\#8 main.*" \ + "\#0 t_small_values \\(arg1=1 '.1', arg2=3, arg3=5, arg4=7 '.a', arg5=9, arg6=11 '.v', arg7=13, arg8=15, arg9=17, arg10=19\\).*\#2 sum10 \\(i0=2, i1=4, i2=6, i3=8, i4=10, i5=12, i6=14, i7=16, i8=18, i9=20\\).*\#3 <function called from gdb>.*\#4 add \\(a=4, b=5\\).*\#5 <function called from gdb>.*\#6 add \\(a=2, b=3\\).*\#7 <function called from gdb>.*\#8 main.*" \ "backtrace at nested call level 4" gdb_test "finish" "Value returned is .* = 100" \ "Finish from nested call level 4" diff --git a/gdb/testsuite/gdb.base/charset.c b/gdb/testsuite/gdb.base/charset.c index b640702..55a50ce 100644 --- a/gdb/testsuite/gdb.base/charset.c +++ b/gdb/testsuite/gdb.base/charset.c @@ -20,11 +20,6 @@ Please email any bugs, comments, and/or additions to this file to: bug-gdb@gnu.org */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - - /* X_string is a null-terminated string in the X charset whose elements are as follows. X should be the name the `set charset' command uses for the character set, in lower-case, with any @@ -54,6 +49,21 @@ char iso_8859_1_string[NUM_CHARS]; char ebcdic_us_string[NUM_CHARS]; char ibm1047_string[NUM_CHARS]; +/* We make a phony wchar_t and then pretend that this platform uses + UCS-4 (or UCS-2, depending on the size -- same difference for the + purposes of this test). */ +typedef unsigned int wchar_t; +wchar_t ucs_4_string[NUM_CHARS]; + +/* We also define a couple phony types for testing the u'' and U'' + support. It is ok if these have the wrong size on some platforms + -- the test case will skip the tests in that case. */ +typedef unsigned short char16_t; +typedef unsigned int char32_t; + +/* Make sure to use the typedefs. */ +char16_t uvar; +char32_t Uvar; void init_string (char string[], @@ -62,7 +72,10 @@ init_string (char string[], char line_feed, char carriage_return, char horizontal_tab, char vertical_tab, char cent, char misc_ctrl) { - memset (string, x, NUM_CHARS); + int i; + + for (i = 0; i < NUM_CHARS; ++i) + string[i] = x; string[0] = alert; string[1] = backspace; string[2] = form_feed; @@ -85,13 +98,21 @@ fill_run (char string[], int start, int len, int first) } +void +init_ucs4 () +{ + int i; + + for (i = 0; i < NUM_CHARS; ++i) + ucs_4_string[i] = iso_8859_1_string[i] & 0xff; +} + int main () { #ifdef usestubs set_debug_traps(); breakpoint(); #endif - (void) malloc (1); /* Initialize ascii_string. */ init_string (ascii_string, 120, @@ -146,5 +167,7 @@ int main () /* The digits, at least, are contiguous. */ fill_run (ibm1047_string, 59, 10, 240); - puts ("All set!"); /* all strings initialized */ + init_ucs4 (); + + return 0; /* all strings initialized */ } diff --git a/gdb/testsuite/gdb.base/charset.exp b/gdb/testsuite/gdb.base/charset.exp index fa26521..a6368c9 100644 --- a/gdb/testsuite/gdb.base/charset.exp +++ b/gdb/testsuite/gdb.base/charset.exp @@ -47,13 +47,7 @@ proc parse_show_charset_output {testname} { global gdb_prompt gdb_expect { - -re "The current host and target character set is `(.*)'\\.\[\r\n\]+$gdb_prompt $" { - set host_charset $expect_out(1,string) - set target_charset $expect_out(1,string) - set retlist [list $host_charset $target_charset] - pass $testname - } - -re "The current host character set is `(.*)'\\.\[\r\n\]+The current target character set is `(.*)'\\.\[\r\n\]+$gdb_prompt $" { + -re "The host character set is \"(.*)\"\\.\[\r\n\]+The target character set is \"(.*)\"\\.\[\r\n\]+The target wide character set is \"(.*)\"\\.\[\r\n\]+$gdb_prompt $" { set host_charset $expect_out(1,string) set target_charset $expect_out(2,string) set retlist [list $host_charset $target_charset] @@ -81,76 +75,31 @@ proc parse_show_charset_output {testname} { } -# Try the various `show charset' commands. These are all aliases of each -# other; `show target-charset' and `show host-charset' actually print -# both the host and target charsets. +# Try the various `show charset' commands. send_gdb "show charset\n" set show_charset [parse_show_charset_output "show charset"] send_gdb "show target-charset\n" -set show_target_charset [parse_show_charset_output "show target-charset"] +set show_target_charset \ + [lindex [parse_show_charset_output "show target-charset"] 0] -if {[lsearch $show_charset $show_target_charset] >= 0} { +if {[lsearch -exact $show_charset $show_target_charset] >= 0} { pass "check `show target-charset' against `show charset'" } else { fail "check `show target-charset' against `show charset'" } send_gdb "show host-charset\n" -set show_host_charset [parse_show_charset_output "show host-charset"] +set show_host_charset \ + [lindex [parse_show_charset_output "show host-charset"] 0] -if {[lsearch $show_charset $show_host_charset] >= 0} { +if {[lsearch -exact $show_charset $show_host_charset] >= 0} { pass "check `show host-charset' against `show charset'" } else { fail "check `show host-charset' against `show charset'" } - -# Get the list of supported (host) charsets as possible completions. -send_gdb "set charset \t\t" - -# Check that we can at least use ASCII as a host character set. -sleep 1 -gdb_expect { - -re "^set charset .*\r\nASCII.*\r\n$gdb_prompt set charset " { - # We got the output that we wanted, including ASCII as possible - # charset. Send a newline to get us back to the prompt. This will - # also generate an error message. Let's not check here that the error - # message makes sense, we do that below, as a separate testcase. - send_gdb "\n" - gdb_expect { - -re ".*Requires an argument.*$gdb_prompt $" { - pass "get valid character sets" - } - -re ".*$gdb_prompt $" { - send_gdb "\n" - gdb_expect { - -re ".*$gdb_prompt $" { - fail "get valid character sets" - } - } - } - timeout { - fail "(timeout) get valid character sets" - } - } - } - -re ".*$gdb_prompt $" { - # We got some output that ended with a regular prompt - fail "get valid character sets" - } - -re ".*$gdb_prompt set charset.*$" { - # We got some other output, send a cntrl-c to gdb to get us back - # to the prompt. - send_gdb "\003" - fail "get valid character sets" - } - timeout { - fail "get valid character sets (timeout)" - } -} - # Try a malformed `set charset'. gdb_test "set charset" \ "Requires an argument. Valid arguments are.*" \ @@ -179,7 +128,7 @@ proc all_charset_names {} { proc valid_host_charset {charset} { global charsets - return $charsets($charset) + return [expr {[info exists charsets($charset)] && $charsets($charset)}] } send_gdb "set host-charset\n" @@ -211,6 +160,13 @@ gdb_expect { } } +# If gdb was built with a phony iconv, it will only have two character +# sets: "auto" and the default. In this situation, this set of tests +# is pointless. +if {[llength [array names charsets]] < 3} { + untested charset.exp + return -1 +} send_gdb "set target-charset\n" gdb_expect { @@ -244,8 +200,10 @@ gdb_expect { } } -# Make sure that GDB supports every host/target charset combination. -foreach host_charset [all_charset_names] { +# We don't want to test all the charset names here, since that would +# be too many combinations. We we pick a subset. +set charset_subset {ASCII ISO-8859-1 EBCDIC-US IBM1047} +foreach host_charset $charset_subset { if {[valid_host_charset $host_charset]} { set testname "try `set host-charset $host_charset'" @@ -279,7 +237,7 @@ foreach host_charset [all_charset_names] { # Now try setting every possible target character set, # given that host charset. - foreach target_charset [all_charset_names] { + foreach target_charset $charset_subset { set testname "try `set target-charset $target_charset'" send_gdb "set target-charset $target_charset\n" gdb_expect { @@ -404,23 +362,42 @@ gdb_expect { } +# We only try the wide character tests on machines where the wchar_t +# typedef in the test case has the right size. +set wchar_size [get_sizeof wchar_t 99] +set wchar_ok 0 +if {$wchar_size == 2} { + lappend charset_subset UCS-2 + set wchar_ok 1 +} elseif {$wchar_size == 4} { + lappend charset_subset UCS-4 + set wchar_ok 1 +} + gdb_test "set host-charset ASCII" "" -foreach target_charset [all_charset_names] { - send_gdb "set target-charset $target_charset\n" +foreach target_charset $charset_subset { + if {$target_charset == "UCS-4" || $target_charset == "UCS-2"} { + set param target-wide-charset + set L L + } else { + set param target-charset + set L "" + } + send_gdb "set $param $target_charset\n" gdb_expect { -re "$gdb_prompt $" { - pass "set target-charset $target_charset" + pass "set $param $target_charset" } timeout { - fail "set target-charset $target_charset (timeout)" + fail "set $param $target_charset (timeout)" } } # Try printing the null character. There seems to be a bug in # gdb_test that requires us to use gdb_expect here. - send_gdb "print '\\0'\n" + send_gdb "print $L'\\0'\n" gdb_expect { - -re "\\\$${decimal} = 0 '\\\\0'\[\r\n\]+$gdb_prompt $" { + -re "\\\$${decimal} = 0 $L'\\\\0'\[\r\n\]+$gdb_prompt $" { pass "print the null character in ${target_charset}" } -re "$gdb_prompt $" { @@ -435,8 +412,14 @@ foreach target_charset [all_charset_names] { # a string in $target_charset. The variable's name is the # character set's name, in lower-case, with all non-identifier # characters replaced with '_', with "_string" stuck on the end. - set var_name [string tolower "${target_charset}_string"] - regsub -all -- "\[^a-z0-9_\]" $var_name "_" var_name + if {$target_charset == "UCS-2"} { + # We still use the ucs_4_string variable -- but the size is + # correct for UCS-2. + set var_name ucs_4_string + } else { + set var_name [string tolower "${target_charset}_string"] + regsub -all -- "\[^a-z0-9_\]" $var_name "_" var_name + } # Compute a regexp matching the results we expect. This is static, # but it's easier than writing it out. @@ -444,12 +427,12 @@ foreach target_charset [all_charset_names] { set uppercase "ABCDEFGHIJKLMNOPQRSTUVWXYZ" set lowercase "abcdefghijklmnopqrstuvwxyz" set digits "0123456789" - set octal_escape "\\\\\[0-9\]\[0-9\]\[0-9\]" + set octal_escape "\\\\\[0-9\]+" send_gdb "print $var_name\n" # ${escapes}${uppercase}${lowercase}${digits}${octal}${octal} gdb_expect { - -re ".* = \"(\\\\a|x)(\\\\b|x)(\\\\f|x)(\\\\n|x)(\\\\r|x)(\\\\t|x)(\\\\v|x)${uppercase}${lowercase}${digits}(\\\\\[0-9\]\[0-9\]\[0-9\]|x)(\\\\\[0-9\]\[0-9\]\[0-9\]|x).*\"\[\r\n\]+$gdb_prompt $" { + -re ".* = $L\"(\\\\a|x)(\\\\b|x)(\\\\f|x)(\\\\n|x)(\\\\r|x)(\\\\t|x)(\\\\v|x)${uppercase}${lowercase}${digits}(${octal_escape}|x)+\"\[\r\n\]+$gdb_prompt $" { pass "print string in $target_charset" } -re "$gdb_prompt $" { @@ -461,22 +444,22 @@ foreach target_charset [all_charset_names] { } # Try entering a character literal, and see if it comes back unchanged. - gdb_test "print 'A'" \ - " = \[0-9-\]+ 'A'" \ + gdb_test "print $L'A'" \ + " = \[0-9-\]+ $L'A'" \ "parse character literal in ${target_charset}" # Check that the character literal was encoded correctly. - gdb_test "print 'A' == $var_name\[7\]" \ + gdb_test "print $L'A' == $var_name\[7\]" \ " = 1" \ "check value of parsed character literal in ${target_charset}" # Try entering a string literal, and see if it comes back unchanged. - gdb_test "print \"abcdefABCDEF012345\"" \ - " = \"abcdefABCDEF012345\"" \ + gdb_test "print $L\"abcdefABCDEF012345\"" \ + " = $L\"abcdefABCDEF012345\"" \ "parse string literal in ${target_charset}" # Check that the string literal was encoded correctly. - gdb_test "print \"q\"\[0\] == $var_name\[49\]" \ + gdb_test "print $L\"q\"\[0\] == $var_name\[49\]" \ " = 1" \ "check value of parsed string literal in ${target_charset}" @@ -509,7 +492,7 @@ foreach target_charset [all_charset_names] { send_gdb "print $var_name\[$i\]\n" set have_escape 1 gdb_expect { - -re "= \[0-9-\]+ '\\\\${escape}'\[\r\n\]+$gdb_prompt $" { + -re "= \[0-9-\]+ $L'\\\\${escape}'\[\r\n\]+$gdb_prompt $" { pass "try printing '\\${escape}' in ${target_charset}" } -re "= \[0-9-\]+ 'x'\[\r\n\]+$gdb_prompt $" { @@ -527,12 +510,12 @@ foreach target_charset [all_charset_names] { if {$have_escape} { # Try parsing a backslash escape in a character literal. - gdb_test "print '\\${escape}' == $var_name\[$i\]" \ + gdb_test "print $L'\\${escape}' == $var_name\[$i\]" \ " = 1" \ "check value of '\\${escape}' in ${target_charset}" # Try parsing a backslash escape in a string literal. - gdb_test "print \"\\${escape}\"\[0\] == $var_name\[$i\]" \ + gdb_test "print $L\"\\${escape}\"\[0\] == $var_name\[$i\]" \ " = 1" \ "check value of \"\\${escape}\" in ${target_charset}" } @@ -540,10 +523,73 @@ foreach target_charset [all_charset_names] { # Try printing a character escape that doesn't exist. We should # get the unescaped character, in the target character set. - gdb_test "print '\\q'" " = \[0-9-\]+ 'q'" \ + gdb_test "print $L'\\q'" " = \[0-9-\]+ $L'q'" \ "print escape that doesn't exist in $target_charset" - gdb_test "print '\\q' == $var_name\[49\]" " = 1" \ + gdb_test "print $L'\\q' == $var_name\[49\]" " = 1" \ "check value of escape that doesn't exist in $target_charset" } +# Reset the target charset. +gdb_test "set target-charset UTF-8" "" + +# \242 is not a valid UTF-8 character. +gdb_test "print \"\\242\"" " = \"\\\\242\"" \ + "non-representable target character" + +gdb_test "print '\\x'" "\\\\x escape without a following hex digit." +gdb_test "print '\\u'" "\\\\u escape without a following hex digit." +gdb_test "print '\\9'" " = \[0-9\]+ '9'" + +# Tests for wide- or unicode- strings. L is the prefix letter to use, +# either "L" (for wide strings), "u" (for UCS-2), or "U" (for UCS-4). +# NAME is used in the test names and should be related to the prefix +# letter in some easy-to-undestand way. +proc test_wide_or_unicode {L name} { + gdb_test "print $L\"ab\" $L\"c\"" " = $L\"abc\"" \ + "basic $name string concatenation" + gdb_test "print $L\"ab\" \"c\"" " = $L\"abc\"" \ + "narrow and $name string concatenation" + gdb_test "print \"ab\" $L\"c\"" " = $L\"abc\"" \ + "$name and narrow string concatenation" + gdb_test "print $L\"\\xe\" $L\"c\"" " = $L\"\\\\16c\"" \ + "$name string concatenation with escape" + gdb_test "print $L\"\" \"abcdef\" \"g\"" \ + "$L\"abcdefg\"" \ + "concatenate three strings with empty $name string" + + gdb_test "print $L'a'" "= \[0-9\]+ $L'a'" \ + "basic $name character" +} + +if {$wchar_ok} { + test_wide_or_unicode L wide +} + +set ucs2_ok [expr {[get_sizeof char16_t 99] == 2}] +if {$ucs2_ok} { + test_wide_or_unicode u UCS-2 +} + +set ucs4_ok [expr {[get_sizeof char32_t 99] == 4}] +if {$ucs4_ok} { + test_wide_or_unicode U UCS-4 +} + +# Test an invalid string combination. +proc test_combination {L1 name1 L2 name2} { + gdb_test "print $L1\"abc\" $L2\"def\"" \ + "Undefined string concatenation." \ + "undefined concatenation of $name1 and $name2" +} + +if {$wchar_ok && $ucs2_ok} { + test_combination L wide u UCS-2 +} +if {$wchar_ok && $ucs4_ok} { + test_combination L wide U UCS-4 +} +if {$ucs2_ok && $ucs4_ok} { + test_combination u UCS-2 U UCS-4 +} + gdb_exit diff --git a/gdb/testsuite/gdb.base/constvars.exp b/gdb/testsuite/gdb.base/constvars.exp index d53a826..6d1bd12 100644 --- a/gdb/testsuite/gdb.base/constvars.exp +++ b/gdb/testsuite/gdb.base/constvars.exp @@ -161,7 +161,7 @@ proc do_constvar_tests {} { gdb_test "print laconic" " = 65 'A'" local_compiler_xfail_check gdb_test "ptype laconic" "type = const char" - gdb_test "print laggard" " = 1 '.001'" + gdb_test "print laggard" " = 1 '.1'" local_compiler_xfail_check gdb_test "ptype laggard" "type = const unsigned char" gdb_test "print lagoon" " = 2" @@ -209,7 +209,7 @@ proc do_constvar_tests {} { gdb_test "print *lewd" " = 65 'A'" local_compiler_xfail_check gdb_test "ptype lewd" "type = const char \\* const" - gdb_test "print *lexicographer" " = 1 '.001'" + gdb_test "print *lexicographer" " = 1 '.1'" local_compiler_xfail_check gdb_test "ptype lexicographer" "type = const unsigned char \\* const" gdb_test "print *lexicon" " = 2" @@ -233,7 +233,7 @@ proc do_constvar_tests {} { gdb_test "print *languish" " = 65 'A'" local_compiler_xfail_check gdb_test "ptype languish" "type = const char \\*" - gdb_test "print *languor" " = 1 '.001'" + gdb_test "print *languor" " = 1 '.1'" local_compiler_xfail_check gdb_test "ptype languor" "type = const unsigned char \\*" gdb_test "print *lank" " = 2" diff --git a/gdb/testsuite/gdb.base/long_long.exp b/gdb/testsuite/gdb.base/long_long.exp index d0ad5ba..5189324 100644 --- a/gdb/testsuite/gdb.base/long_long.exp +++ b/gdb/testsuite/gdb.base/long_long.exp @@ -210,7 +210,7 @@ gdb_test_char "p/o *(char *)c" "01" gdb_test_char "p/t *(char *)c" "1" gdb_test_char "p/a *(char *)c" "0x1( <.*>)?" gdb_test_char "p/f *(char *)c" "1" -gdb_test_char "p/c *(char *)c" "1 '.001'" +gdb_test_char "p/c *(char *)c" "1 '.1'" gdb_test_short "p/x *(short *)s" "" "0x123" "" gdb_test_short "p/d *(short *)s" "" "291" "" @@ -257,7 +257,7 @@ gdb_test "x/u w" "19088743" gdb_test "x/o w" "0110642547" gdb_test "x/t w" "00000001001000110100010101100111" gdb_test_xptr "x/a" { b "" } { h "" } { w "0x1234567" } { g "0x123456789abcdef" } -gdb_test "x/c b" "1 '.001'" +gdb_test "x/c b" "1 '.1'" if { $sizeof_double == 8 || $sizeof_long_double == 8 } { gdb_test "x/f &val.oct" "-5.9822653797615723e-120" } else { @@ -273,7 +273,7 @@ gdb_test "x/2u g" "81985529216486895.*12046818088235383159" gdb_test "x/2o g" "04432126361152746757.*01234567123456701234567" gdb_test "x/2t g" "0000000100100011010001010110011110001001101010111100110111101111.*1010011100101110111001010011100101110111000001010011100101110111" gdb_test_xptr "x/2a" { b "" } { h "" } { w "0x1234567.*0xa72ee539" } { g "0x123456789abcdef.*0xa72ee53977053977" } -gdb_test "x/2c b" "1 '.001'.*-89 '.'" +gdb_test "x/2c b" "1 '.1'.*-89 '.\[0-9\]*'" if { $sizeof_double == 8 || $sizeof_long_double == 8 } { gdb_test "x/2f &val.oct" "-5.9822653797615723e-120.*-5.9041889495880968e-100" } else { @@ -288,7 +288,7 @@ gdb_test "x/2bu b" "1.*167" gdb_test "x/2bo b" "01.*0247" gdb_test "x/2bt b" "00000001.*10100111" gdb_test_ptr "x/2ba b" "" "" "0x1.*0xffffffa7" "0x1.*0xffffffffffffffa7" -gdb_test "x/2bc b" "1 '.001'.*-89 '.'" +gdb_test "x/2bc b" "1 '.1'.*-89 '.\[0-9\]*'" gdb_test "x/2bf b" "1.*-89" gdb_test "x/2hx h" "0x0123.*0xa72e" @@ -315,7 +315,7 @@ gdb_test "x/2gu g" "81985529216486895.*12046818088235383159" gdb_test "x/2go g" "04432126361152746757.*01234567123456701234567" gdb_test "x/2gt g" "0000000100100011010001010110011110001001101010111100110111101111.*1010011100101110111001010011100101110111000001010011100101110111" gdb_test_ptr "x/2ga g" "" "" "0x89abcdef.*0x77053977" "0x123456789abcdef.*0xa72ee53977053977" -gdb_test "x/2gc g" "-17 '.'.*119 'w'" +gdb_test "x/2gc g" "-17 '.\[0-9\]*'.*119 'w'" gdb_test "x/2gf g" "3.5127005640885037e-303.*-5.9822653797615723e-120" gdb_exit diff --git a/gdb/testsuite/gdb.base/pointers.exp b/gdb/testsuite/gdb.base/pointers.exp index 91838a2..2d0a70e 100644 --- a/gdb/testsuite/gdb.base/pointers.exp +++ b/gdb/testsuite/gdb.base/pointers.exp @@ -389,7 +389,7 @@ gdb_expect { send_gdb "print *pUC\n" gdb_expect { - -re ".\[0-9\]* = 21 \'.025\'.*$gdb_prompt $" { + -re ".\[0-9\]* = 21 \'.25\'.*$gdb_prompt $" { pass "print value of *pUC" } -re ".*$gdb_prompt $" { fail "print value of *pUC" } diff --git a/gdb/testsuite/gdb.base/printcmds.exp b/gdb/testsuite/gdb.base/printcmds.exp index 1e17da4..b6f8a1f 100644 --- a/gdb/testsuite/gdb.base/printcmds.exp +++ b/gdb/testsuite/gdb.base/printcmds.exp @@ -137,12 +137,12 @@ proc test_print_all_chars {} { global gdb_prompt gdb_test "p ctable1\[0\]" " = 0 '\\\\0'" - gdb_test "p ctable1\[1\]" " = 1 '\\\\001'" - gdb_test "p ctable1\[2\]" " = 2 '\\\\002'" - gdb_test "p ctable1\[3\]" " = 3 '\\\\003'" - gdb_test "p ctable1\[4\]" " = 4 '\\\\004'" - gdb_test "p ctable1\[5\]" " = 5 '\\\\005'" - gdb_test "p ctable1\[6\]" " = 6 '\\\\006'" + gdb_test "p ctable1\[1\]" " = 1 '\\\\1'" + gdb_test "p ctable1\[2\]" " = 2 '\\\\2'" + gdb_test "p ctable1\[3\]" " = 3 '\\\\3'" + gdb_test "p ctable1\[4\]" " = 4 '\\\\4'" + gdb_test "p ctable1\[5\]" " = 5 '\\\\5'" + gdb_test "p ctable1\[6\]" " = 6 '\\\\6'" gdb_test "p ctable1\[7\]" " = 7 '\\\\a'" gdb_test "p ctable1\[8\]" " = 8 '\\\\b'" gdb_test "p ctable1\[9\]" " = 9 '\\\\t'" @@ -150,24 +150,24 @@ proc test_print_all_chars {} { gdb_test "p ctable1\[11\]" " = 11 '\\\\v'" gdb_test "p ctable1\[12\]" " = 12 '\\\\f'" gdb_test "p ctable1\[13\]" " = 13 '\\\\r'" - gdb_test "p ctable1\[14\]" " = 14 '\\\\016'" - gdb_test "p ctable1\[15\]" " = 15 '\\\\017'" - gdb_test "p ctable1\[16\]" " = 16 '\\\\020'" - gdb_test "p ctable1\[17\]" " = 17 '\\\\021'" - gdb_test "p ctable1\[18\]" " = 18 '\\\\022'" - gdb_test "p ctable1\[19\]" " = 19 '\\\\023'" - gdb_test "p ctable1\[20\]" " = 20 '\\\\024'" - gdb_test "p ctable1\[21\]" " = 21 '\\\\025'" - gdb_test "p ctable1\[22\]" " = 22 '\\\\026'" - gdb_test "p ctable1\[23\]" " = 23 '\\\\027'" - gdb_test "p ctable1\[24\]" " = 24 '\\\\030'" - gdb_test "p ctable1\[25\]" " = 25 '\\\\031'" - gdb_test "p ctable1\[26\]" " = 26 '\\\\032'" - gdb_test "p ctable1\[27\]" " = 27 '\\\\033'" - gdb_test "p ctable1\[28\]" " = 28 '\\\\034'" - gdb_test "p ctable1\[29\]" " = 29 '\\\\035'" - gdb_test "p ctable1\[30\]" " = 30 '\\\\036'" - gdb_test "p ctable1\[31\]" " = 31 '\\\\037'" + gdb_test "p ctable1\[14\]" " = 14 '\\\\16'" + gdb_test "p ctable1\[15\]" " = 15 '\\\\17'" + gdb_test "p ctable1\[16\]" " = 16 '\\\\20'" + gdb_test "p ctable1\[17\]" " = 17 '\\\\21'" + gdb_test "p ctable1\[18\]" " = 18 '\\\\22'" + gdb_test "p ctable1\[19\]" " = 19 '\\\\23'" + gdb_test "p ctable1\[20\]" " = 20 '\\\\24'" + gdb_test "p ctable1\[21\]" " = 21 '\\\\25'" + gdb_test "p ctable1\[22\]" " = 22 '\\\\26'" + gdb_test "p ctable1\[23\]" " = 23 '\\\\27'" + gdb_test "p ctable1\[24\]" " = 24 '\\\\30'" + gdb_test "p ctable1\[25\]" " = 25 '\\\\31'" + gdb_test "p ctable1\[26\]" " = 26 '\\\\32'" + gdb_test "p ctable1\[27\]" " = 27 '\\\\33'" + gdb_test "p ctable1\[28\]" " = 28 '\\\\34'" + gdb_test "p ctable1\[29\]" " = 29 '\\\\35'" + gdb_test "p ctable1\[30\]" " = 30 '\\\\36'" + gdb_test "p ctable1\[31\]" " = 31 '\\\\37'" gdb_test "p ctable1\[32\]" " = 32 ' '" gdb_test "p ctable1\[33\]" " = 33 '!'" gdb_test "p ctable1\[34\]" " = 34 '\"'" @@ -475,13 +475,13 @@ proc test_print_strings {} { gdb_test "p &ctable1\[0\]" \ " = \\(unsigned char \\*\\) \"\"" gdb_test "p &ctable1\[1\]" \ - " = \\(unsigned char \\*\\) \"\\\\001\\\\002\\\\003\\\\004\\\\005\\\\006\\\\a\\\\b\"..." + " = \\(unsigned char \\*\\) \"\\\\1\\\\2\\\\3\\\\4\\\\5\\\\6\\\\a\\\\b\"..." gdb_test "p &ctable1\[1*8\]" \ - " = \\(unsigned char \\*\\) \"\\\\b\\\\t\\\\n\\\\v\\\\f\\\\r\\\\016\\\\017\"..." + " = \\(unsigned char \\*\\) \"\\\\b\\\\t\\\\n\\\\v\\\\f\\\\r\\\\16\\\\17\"..." gdb_test "p &ctable1\[2*8\]" \ - " = \\(unsigned char \\*\\) \"\\\\020\\\\021\\\\022\\\\023\\\\024\\\\025\\\\026\\\\027\"..." + " = \\(unsigned char \\*\\) \"\\\\20\\\\21\\\\22\\\\23\\\\24\\\\25\\\\26\\\\27\"..." gdb_test "p &ctable1\[3*8\]" \ - " = \\(unsigned char \\*\\) \"\\\\030\\\\031\\\\032\\\\033\\\\034\\\\035\\\\036\\\\037\"..." + " = \\(unsigned char \\*\\) \"\\\\30\\\\31\\\\32\\\\33\\\\34\\\\35\\\\36\\\\37\"..." gdb_test "p &ctable1\[4*8\]" \ " = \\(unsigned char \\*\\) \" !\\\\\"#\\\$%&'\"..." gdb_test "p &ctable1\[5*8\]" \ @@ -622,7 +622,7 @@ proc test_print_string_constants {} { set timeout 60; gdb_test "p \"a string\"" " = \"a string\"" - gdb_test "p \"embedded \\000 null\"" " = \"embedded \\\\000 null\"" + gdb_test "p \"embedded \\000 null\"" " = \"embedded \\\\0 null\"" gdb_test "p \"abcd\"\[2\]" " = 99 'c'" gdb_test "p sizeof (\"abcdef\")" " = 7" gdb_test "ptype \"foo\"" " = char \\\[4\\\]" diff --git a/gdb/testsuite/gdb.base/setvar.exp b/gdb/testsuite/gdb.base/setvar.exp index 2350a33..3be8424 100644 --- a/gdb/testsuite/gdb.base/setvar.exp +++ b/gdb/testsuite/gdb.base/setvar.exp @@ -121,7 +121,7 @@ proc test_set { args } { # test_set "set variable v_char=0" "print v_char" ".\[0-9\]* = 0 \'.0\'" "set variable char=0" -test_set "set variable v_char=1" "print v_char" ".\[0-9\]* = 1 \'.001\'" "set variable char=1" +test_set "set variable v_char=1" "print v_char" ".\[0-9\]* = 1 \'.1\'" "set variable char=1" test_set "set variable v_char=7" "print v_char" ".\[0-9\]* = 7 \'.a\'" "set variable char=7 (Bel)" test_set "set variable v_char=32" "print v_char" ".\[0-9\]* = 32 \' \'" "set variable char=32 (SPC)" test_set "set variable v_char=65" "print v_char" ".\[0-9\]* = 65 \'A\'" "set variable char=65 ('A')" @@ -132,7 +132,7 @@ test_set "set variable v_char=127" "print v_char" ".\[0-9\]* = 127 \'.177\'" # test "set variable" for type "signed char" # test_set "set variable v_char=0" "print v_signed_char" ".\[0-9\]* = 0 \'.0\'" "set variable signed char=0" -test_set "set variable v_signed_char=1" "print v_signed_char" ".\[0-9\]* = 1 \'.001\'" "set variable signed char=1" +test_set "set variable v_signed_char=1" "print v_signed_char" ".\[0-9\]* = 1 \'.1\'" "set variable signed char=1" test_set "set variable v_signed_char=7" "print v_signed_char" ".\[0-9\]* = 7 \'.a\'" "set variable signed char=7 (Bel)" test_set "set variable v_signed_char=32" "print v_signed_char" ".\[0-9\]* = 32 \' \'" "set variable signed char=32 (SPC)" test_set "set variable v_signed_char=65" "print v_signed_char" ".\[0-9\]* = 65 \'A\'" "set variable signed char=65 ('A')" @@ -151,7 +151,7 @@ gdb_test "print v_signed_char" ".\[0-9\]* = -1 \'.377\'" \ # test "set variable" for type "unsigned char" # test_set "set variable v_unsigned_char=0" "print v_unsigned_char" ".\[0-9\]* = 0 \'.0\'" "set variable unsigned char=0" -test_set "set variable v_unsigned_char=1" "print v_unsigned_char" ".\[0-9\]* = 1 \'.001\'" "set variable unsigned char=1" +test_set "set variable v_unsigned_char=1" "print v_unsigned_char" ".\[0-9\]* = 1 \'.1\'" "set variable unsigned char=1" test_set "set variable v_unsigned_char=7" "print v_unsigned_char" ".\[0-9\]* = 7 \'.a\'" "set variable unsigned char=7 (Bel)" test_set "set variable v_unsigned_char=32" "print v_unsigned_char" ".\[0-9\]* = 32 \' \'" "set variable unsigned char=32 (SPC)" test_set "set variable v_unsigned_char=65" "print v_unsigned_char" ".\[0-9\]* = 65 \'A\'" "set variable unsigned char=65 ('A')" diff --git a/gdb/testsuite/gdb.base/store.exp b/gdb/testsuite/gdb.base/store.exp index 963bb19..feab6bd 100644 --- a/gdb/testsuite/gdb.base/store.exp +++ b/gdb/testsuite/gdb.base/store.exp @@ -74,7 +74,7 @@ proc check_set { t l r new add } { "${prefix}; print incremented l, expecting ${add}" } -check_set "charest" "-1 .*" "-2 .*" "4 ..004." "2 ..002." +check_set "charest" "-1 .*" "-2 .*" "4 ..4." "2 ..2." check_set "short" "-1" "-2" "4" "2" check_set "int" "-1" "-2" "4" "2" check_set "long" "-1" "-2" "4" "2" @@ -102,7 +102,7 @@ proc up_set { t l r new } { "${prefix}; print new l, expecting ${new}" } -up_set "charest" "-1 .*" "-2 .*" "4 ..004." +up_set "charest" "-1 .*" "-2 .*" "4 ..4." up_set "short" "-1" "-2" "4" up_set "int" "-1" "-2" "4" up_set "long" "-1" "-2" "4" diff --git a/gdb/testsuite/gdb.cp/ref-types.exp b/gdb/testsuite/gdb.cp/ref-types.exp index 4784cb2..b2e55cf 100644 --- a/gdb/testsuite/gdb.cp/ref-types.exp +++ b/gdb/testsuite/gdb.cp/ref-types.exp @@ -284,7 +284,7 @@ gdb_expect { send_gdb "print UC\n" gdb_expect { - -re ".\[0-9\]* = 21 '\.025'\.*$gdb_prompt $" { + -re ".\[0-9\]* = 21 '\.25'\.*$gdb_prompt $" { pass "print value of UC" } -re ".*$gdb_prompt $" { fail "print value of UC" } @@ -557,7 +557,7 @@ gdb_expect { send_gdb "print rUC\n" gdb_expect { - -re ".\[0-9\]* = \\(unsigned char &\\) @$hex: 21 \'.025\'.*$gdb_prompt $" { + -re ".\[0-9\]* = \\(unsigned char &\\) @$hex: 21 \'.25\'.*$gdb_prompt $" { pass "print value of rUC" } -re ".*$gdb_prompt $" { fail "print value of rUC" } diff --git a/gdb/testsuite/lib/gdb.exp b/gdb/testsuite/lib/gdb.exp index 820ab20..8b8e7c6 100644 --- a/gdb/testsuite/lib/gdb.exp +++ b/gdb/testsuite/lib/gdb.exp @@ -1162,9 +1162,12 @@ proc default_gdb_start { } { global gdb_prompt global timeout global gdb_spawn_id; + global env gdb_stop_suppressing_tests; + set env(LC_CTYPE) C + verbose "Spawning $GDB $INTERNAL_GDBFLAGS $GDBFLAGS" if [info exists gdb_spawn_id] { diff --git a/gdb/typeprint.c b/gdb/typeprint.c index 1f824fa..8133ad0 100644 --- a/gdb/typeprint.c +++ b/gdb/typeprint.c @@ -236,7 +236,7 @@ print_type_scalar (struct type *type, LONGEST val, struct ui_file *stream) break; case TYPE_CODE_CHAR: - LA_PRINT_CHAR ((unsigned char) val, stream); + LA_PRINT_CHAR ((unsigned char) val, type, stream); break; case TYPE_CODE_BOOL: diff --git a/gdb/utils.c b/gdb/utils.c index 0becfd9..5338868 100644 --- a/gdb/utils.c +++ b/gdb/utils.c @@ -1571,21 +1571,33 @@ query (const char *ctlstr, ...) va_end (args); } -/* Print an error message saying that we couldn't make sense of a - \^mumble sequence in a string or character constant. START and END - indicate a substring of some larger string that contains the - erroneous backslash sequence, missing the initial backslash. */ -static NORETURN int -no_control_char_error (const char *start, const char *end) +/* A helper for parse_escape that converts a host character to a + target character. C is the host character. If conversion is + possible, then the target character is stored in *TARGET_C and the + function returns 1. Otherwise, the function returns 0. */ + +static int +host_char_to_target (int c, int *target_c) { - int len = end - start; - char *copy = alloca (end - start + 1); + struct obstack host_data; + char the_char = c; + struct cleanup *cleanups; + int result = 0; + + obstack_init (&host_data); + cleanups = make_cleanup_obstack_free (&host_data); - memcpy (copy, start, len); - copy[len] = '\0'; + convert_between_encodings (target_charset (), host_charset (), + &the_char, 1, 1, &host_data, translit_none); + + if (obstack_object_size (&host_data) == 1) + { + result = 1; + *target_c = *(char *) obstack_base (&host_data); + } - error (_("There is no control character `\\%s' in the `%s' character set."), - copy, target_charset ()); + do_cleanups (cleanups); + return result; } /* Parse a C escape sequence. STRING_PTR points to a variable @@ -1608,53 +1620,13 @@ parse_escape (char **string_ptr) { int target_char; int c = *(*string_ptr)++; - if (c_parse_backslash (c, &target_char)) - return target_char; - else - switch (c) - { + switch (c) + { case '\n': return -2; case 0: (*string_ptr)--; return 0; - case '^': - { - /* Remember where this escape sequence started, for reporting - errors. */ - char *sequence_start_pos = *string_ptr - 1; - - c = *(*string_ptr)++; - - if (c == '?') - { - /* XXXCHARSET: What is `delete' in the host character set? */ - c = 0177; - - if (!host_char_to_target (c, &target_char)) - error (_("There is no character corresponding to `Delete' " - "in the target character set `%s'."), host_charset ()); - - return target_char; - } - else if (c == '\\') - target_char = parse_escape (string_ptr); - else - { - if (!host_char_to_target (c, &target_char)) - no_control_char_error (sequence_start_pos, *string_ptr); - } - - /* Now target_char is something like `c', and we want to find - its control-character equivalent. */ - if (!target_char_to_control_char (target_char, &target_char)) - no_control_char_error (sequence_start_pos, *string_ptr); - - return target_char; - } - - /* XXXCHARSET: we need to use isdigit and value-of-digit - methods of the host character set here. */ case '0': case '1': @@ -1665,16 +1637,16 @@ parse_escape (char **string_ptr) case '6': case '7': { - int i = c - '0'; + int i = host_hex_value (c); int count = 0; while (++count < 3) { c = (**string_ptr); - if (c >= '0' && c <= '7') + if (isdigit (c) && c != '8' && c != '9') { (*string_ptr)++; i *= 8; - i += c - '0'; + i += host_hex_value (c); } else { @@ -1683,14 +1655,39 @@ parse_escape (char **string_ptr) } return i; } - default: - if (!host_char_to_target (c, &target_char)) - error - ("The escape sequence `\%c' is equivalent to plain `%c', which" - " has no equivalent\n" "in the `%s' character set.", c, c, - target_charset ()); - return target_char; - } + + case 'a': + c = '\a'; + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + + default: + break; + } + + if (!host_char_to_target (c, &target_char)) + error + ("The escape sequence `\%c' is equivalent to plain `%c', which" + " has no equivalent\n" "in the `%s' character set.", c, c, + target_charset ()); + return target_char; } /* Print the character C on STREAM as part of the contents of a literal diff --git a/gdb/valops.c b/gdb/valops.c index 9810f2b..a5bef37 100644 --- a/gdb/valops.c +++ b/gdb/valops.c @@ -1352,6 +1352,24 @@ value_array (int lowbound, int highbound, struct value **elemvec) return val; } +struct value * +value_typed_string (char *ptr, int len, struct type *char_type) +{ + struct value *val; + int lowbound = current_language->string_lower_bound; + int highbound = len / TYPE_LENGTH (char_type); + struct type *rangetype = create_range_type ((struct type *) NULL, + builtin_type_int32, + lowbound, + highbound + lowbound - 1); + struct type *stringtype + = create_array_type ((struct type *) NULL, char_type, rangetype); + + val = allocate_value (stringtype); + memcpy (value_contents_raw (val), ptr, len); + return val; +} + /* Create a value for a string constant by allocating space in the inferior, copying the data into that space, and returning the address with type TYPE_CODE_STRING. PTR points to the string diff --git a/gdb/valprint.c b/gdb/valprint.c index b02e9df..32f9ded 100644 --- a/gdb/valprint.c +++ b/gdb/valprint.c @@ -919,7 +919,8 @@ print_hex_chars (struct ui_file *stream, const gdb_byte *valaddr, Omit any leading zero chars. */ void -print_char_chars (struct ui_file *stream, const gdb_byte *valaddr, +print_char_chars (struct ui_file *stream, struct type *type, + const gdb_byte *valaddr, unsigned len, enum bfd_endian byte_order) { const gdb_byte *p; @@ -932,7 +933,7 @@ print_char_chars (struct ui_file *stream, const gdb_byte *valaddr, while (p < valaddr + len) { - LA_EMIT_CHAR (*p, stream, '\''); + LA_EMIT_CHAR (*p, type, stream, '\''); ++p; } } @@ -944,7 +945,7 @@ print_char_chars (struct ui_file *stream, const gdb_byte *valaddr, while (p >= valaddr) { - LA_EMIT_CHAR (*p, stream, '\''); + LA_EMIT_CHAR (*p, type, stream, '\''); --p; } } @@ -1315,7 +1316,8 @@ read_string (CORE_ADDR addr, int len, int width, unsigned int fetchlimit, whichever is smaller. */ int -val_print_string (CORE_ADDR addr, int len, int width, struct ui_file *stream, +val_print_string (struct type *elttype, CORE_ADDR addr, int len, + struct ui_file *stream, const struct value_print_options *options) { int force_ellipsis = 0; /* Force ellipsis to be printed if nonzero. */ @@ -1325,6 +1327,7 @@ val_print_string (CORE_ADDR addr, int len, int width, struct ui_file *stream, int bytes_read; gdb_byte *buffer = NULL; /* Dynamically growable fetch buffer. */ struct cleanup *old_chain = NULL; /* Top of the old cleanup chain. */ + int width = TYPE_LENGTH (elttype); /* First we need to figure out the limit on the number of characters we are going to attempt to fetch and print. This is actually pretty simple. If @@ -1378,7 +1381,7 @@ val_print_string (CORE_ADDR addr, int len, int width, struct ui_file *stream, { fputs_filtered (" ", stream); } - LA_PRINT_STRING (stream, buffer, bytes_read / width, width, force_ellipsis, options); + LA_PRINT_STRING (stream, elttype, buffer, bytes_read / width, force_ellipsis, options); } if (errcode != 0) diff --git a/gdb/valprint.h b/gdb/valprint.h index 8b65af6..90dcdc2 100644 --- a/gdb/valprint.h +++ b/gdb/valprint.h @@ -134,9 +134,10 @@ extern void print_decimal_chars (struct ui_file *, const gdb_byte *, extern void print_hex_chars (struct ui_file *, const gdb_byte *, unsigned int, enum bfd_endian); -extern void print_char_chars (struct ui_file *, const gdb_byte *, - unsigned int, enum bfd_endian); +extern void print_char_chars (struct ui_file *, struct type *, + const gdb_byte *, unsigned int, enum bfd_endian); int read_string (CORE_ADDR addr, int len, int width, unsigned int fetchlimit, gdb_byte **buffer, int *bytes_read); + #endif diff --git a/gdb/value.h b/gdb/value.h index aa43365..3dddeef 100644 --- a/gdb/value.h +++ b/gdb/value.h @@ -388,6 +388,8 @@ extern struct value *value_mark (void); extern void value_free_to_mark (struct value *mark); +extern struct value *value_typed_string (char *ptr, int len, + struct type *char_type); extern struct value *value_string (char *ptr, int len); extern struct value *value_bitstring (char *ptr, int len); @@ -619,7 +621,7 @@ extern int common_val_print (struct value *val, const struct value_print_options *options, const struct language_defn *language); -extern int val_print_string (CORE_ADDR addr, int len, int width, +extern int val_print_string (struct type *elttype, CORE_ADDR addr, int len, struct ui_file *stream, const struct value_print_options *options); |