diff options
author | Tom Tromey <tromey@redhat.com> | 2009-03-20 23:04:40 +0000 |
---|---|---|
committer | Tom Tromey <tromey@redhat.com> | 2009-03-20 23:04:40 +0000 |
commit | 6c7a06a3facfa3f71606655871d0617d3cad80d9 (patch) | |
tree | 34aa9f54c4d9d5748c654b09f2992a92e217f6cd /gdb/c-lang.c | |
parent | cb249c71f6bfb565578d4258eaab9144a45716fe (diff) | |
download | gdb-6c7a06a3facfa3f71606655871d0617d3cad80d9.zip gdb-6c7a06a3facfa3f71606655871d0617d3cad80d9.tar.gz gdb-6c7a06a3facfa3f71606655871d0617d3cad80d9.tar.bz2 |
gdb:
2009-03-19 Tom Tromey <tromey@redhat.com>
Julian Brown <julian@codesourcery.com>
PR i18n/7220, PR i18n/7821, PR exp/8815, PR exp/9103,
PR i18n/9401, PR exp/9613:
* NEWS: Update
* value.h (value_typed_string): Declare.
(val_print_string): Update.
* valprint.h (print_char_chars): Update.
* valprint.c (print_char_chars): Add type argument. Update.
(val_print_string): Likewise.
* valops.c (value_typed_string): New function.
* utils.c (host_char_to_target): New function.
(parse_escape): Use host_char_to_target, host_hex_value. Update.
Remove '^' case.
(no_control_char_error): Remove.
* typeprint.c (print_type_scalar): Update.
* scm-valprint.c (scm_scmval_print): Update.
* scm-lang.h (scm_printchar, scm_printstr): Update.
* scm-lang.c (scm_printchar): Add type argument.
(scm_printstr): Likewise.
* printcmd.c (print_formatted): Update.
(print_scalar_formatted): Update.
(printf_command) <wide_string_arg, wide_char_arg>: New constants.
Handle '%lc' and '%ls'.
* parser-defs.h (struct typed_stoken): New type.
(struct stoken_vector): Likewise.
(write_exp_string_vector): Declare.
* parse.c (write_exp_string_vector): New function.
* p-valprint.c (pascal_val_print): Update.
* p-lang.h (is_pascal_string_type, pascal_printchar,
pascal_printstr): Update.
* p-lang.c (is_pascal_string_type): Remove 'char_size' argument.
Add 'char_type' argument.
(pascal_emit_char): Add type argument.
(pascal_printchar): Likewise.
(pascal_printstr): Likewise.
* objc-lang.c (objc_emit_char): Add type argument.
(objc_printchar): Likewise.
(objc_printstr): Likewise.
* macroexp.c (get_character_constant): Handle unicode characters.
Use c_parse_escape.
(get_string_literal): Handle unicode strings. Use
c_parse_escape.
* m2-valprint.c (print_unpacked_pointer): Update.
(m2_print_array_contents): Update.
(m2_val_print): Update.
* m2-lang.c (m2_emit_char): Add type argument.
(m2_printchar): Likewise.
(m2_printstr): Likewise.
* language.h (struct language_defn) <la_printchar>: Add type
argument.
<la_printstr, la_emitchar>: Likewise.
(LA_PRINT_CHAR): Likewise.
(LA_PRINT_STRING): Likewise.
(LA_EMIT_CHAR): Likewise.
* language.c (unk_lang_emit_char): Add type argument.
(unk_lang_printchar): Likewise.
(unk_lang_printstr): Likewise.
* jv-valprint.c (java_val_print): Update.
* jv-lang.c (java_emit_char): Add type argument.
* f-valprint.c (f_val_print): Update.
* f-lang.c (f_emit_char): Add type argument.
(f_printchar): Likewise.
(f_printstr): Likewise.
* expprint.c (print_subexp_standard): Update.
* charset.h (target_wide_charset): Declare.
(c_target_char_has_backslash_escape, c_parse_backslash,
host_char_print_literally, host_char_to_target,
target_char_to_host, target_char_to_control_char): Remove.
(enum transliterations): New type.
(convert_between_encodings): Declare.
(HOST_ESCAPE_CHAR): New define.
(host_letter_to_control_character, host_hex_value): Declare.
(enum wchar_iterate_result): New enum.
(struct wchar_iterator): Declare.
(make_wchar_iterator, make_cleanup_wchar_iterator, wchar_iterator,
wchar_push_back): Declare.
* charset-list.h: New file.
* c-valprint.c (textual_name): New function.
(textual_element_type): Handle wide character types.
(c_val_print): Pass original type to textual_element_type. Handle
wide character types.
(c_value_print): Use textual_element_type. Pass original type of
value to val_print.
* c-lang.h (enum c_string_type): New type.
(c_printchar, c_printstr): Update.
* c-lang.c (classify_type): New function.
(print_wchar): Likewise.
(c_emit_char): Add type argument. Handle wide characters.
(c_printchar): Likewise.
(c_printstr): Add type argument. Handle wide and multibyte
character sets.
(convert_ucn): New function.
(emit_numeric_character): Likewise.
(convert_octal): Likewise.
(convert_hex): Likewise.
(ADVANCE): New macro.
(convert_escape): New function.
(parse_one_string): Likewise.
(evaluate_subexp_c): Likewise.
(exp_descriptor_c): New global.
(c_language_defn): Use exp_descriptor_c.
(cplus_language_defn): Likewise.
(asm_language_defn): Likewise.
(minimal_language_defn): Likewise.
(charset_for_string_type): New function.
* c-exp.y (%union): Add 'svec' and 'tsval'.
(CHAR): New token.
(exp): Add CHAR production.
(string_exp): Rewrite.
(exp) <string_exp>: Rewrite.
(tempbuf): Now global.
(tempbuf_init): New global.
(parse_string_or_char): New function.
(yylex) <tempbuf>: Now global.
<tokptr, tempbufindex, tempbufsize, token_string, class_prefix>:
Remove.
Handle 'u', 'U', and 'L' prefixes. Call parse_string_or_char.
(c_parse_escape): New function.
* auxv.c (fprint_target_auxv): Update.
* ada-valprint.c (ada_emit_char): Add type argument.
(ada_printchar): Likewise.
(ada_print_scalar): Update.
(printstr): Add type argument. Update calls to ada_emit_char.
(ada_printstr): Add type argument.
(ada_val_print_array): Update.
(ada_val_print_1): Likewise.
* ada-lang.c (emit_char): Add type argument.
* ada-lang.h (ada_emit_char, ada_printchar, ada_printstr): Add
type arguments.
* gdb_locale.h: Include langinfo.h.
* charset.c (_initialize_charset): Set default host charset from
the locale. Don't register charsets. Add target-wide-charset
commands. Call find_charset_names.
(struct charset, struct translation): Remove.
(GDB_DEFAULT_HOST_CHARSET): Remove.
(GDB_DEFAULT_TARGET_WIDE_CHARSET): New define.
(target_wide_charset_name): New global.
(show_host_charset_name): Handle "auto".
(show_target_wide_charset_name): New function.
(host_charset_enum, target_charset_enum): Remove.
(charset_enum): New global.
(all_charsets, register_charset, lookup_charset, all_translations,
register_translation, lookup_translation): Remove.
(simple_charset, ascii_print_literally, ascii_to_control): Remove.
(iso_8859_print_literally, iso_8859_to_control,
iso_8859_family_charset): Remove.
(ebcdic_print_literally, ebcdic_to_control,
ebcdic_family_charset): Remove.
(struct cached_iconv, check_iconv_cache, cached_iconv_convert,
register_iconv_charsets): Remove.
(target_wide_charset_be_name, target_wide_charset_le_name): New
globals.
(identity_either_char_to_other): Remove.
(set_be_le_names, validate): New functions.
(backslashable, backslashed, represented): Remove.
(default_c_target_char_has_backslash_escape): Remove.
(default_c_parse_backslash, iconv_convert): Remove.
(ascii_to_iso_8859_1_table, ascii_to_ebcdic_us_table,
ascii_to_ibm1047_table, iso_8859_1_to_ascii_table,
iso_8859_1_to_ebcdic_us_table, iso_8859_1_to_ibm1047_table,
ebcdic_us_to_ascii_table, ebcdic_us_to_iso_8859_1_table,
ebcdic_us_to_ibm1047_table, ibm1047_to_ascii_table,
ibm1047_to_iso_8859_1_table, ibm1047_to_ebcdic_us_table): Remove.
(table_convert_char, table_translation, simple_table_translation):
Remove.
(current_host_charset, current_target_charset,
c_target_char_has_backslash_escape_func,
c_target_char_has_backslash_escape_baton): Remove.
(c_parse_backslash_func, c_parse_backslash_baton): Remove.
(host_char_to_target_func, host_char_to_target_baton): Remove.
(target_char_to_host_func, target_char_to_host_baton): Remove.
(cached_iconv_host_to_target, cached_iconv_target_to_host):
Remove.
(lookup_charset_or_error, check_valid_host_charset): Remove.
(set_host_and_target_charsets): Remove.
(set_host_charset, set_target_charset): Remove.
(set_host_charset_sfunc, set_target_charset_sfunc): Rewrite.
(set_target_wide_charset_sfunc): New function.
(show_charset): Print target wide character set.
(host_charset, target_charset): Rewrite.
(target_wide_charset): New function.
(c_target_char_has_backslash_escape): Remove.
(c_parse_backslash): Remove.
(host_letter_to_control_character): New function.
(host_char_print_literally): Remove.
(host_hex_value): New function.
(target_char_to_control_char): Remove.
(cleanup_iconv): New function.
(convert_between_encodings): New function.
(target_char_to_host): Remove.
(struct wchar_iterator): Define.
(make_wchar_iterator, make_cleanup_wchar_iterator, wchar_iterator,
wchar_push_back): New functions.
(do_cleanup_iterator): New function.
(char_ptr): New typedef.
(charsets): New global.
(add_one, find_charset_names): New functions.
(default_charset_names): New global.
(auto_host_charset_name): Likewise.
* aclocal.m4, config.in, configure: Rebuild.
* configure.ac: Call AM_LANGINFO_CODESET.
(GDB_DEFAULT_HOST_CHARSET): Default to UTF-8.
(AM_ICONV): Invoke earlier.
* acinclude.m4: Include codeset.m4. Subst LIBICONV_INCLUDE and
LIBICONV_LIBDIR. Check for libiconv in build tree.
* Makefile.in (LIBICONV_LIBDIR, LIBICONV_INCLUDE): New macros.
(INTERNAL_CFLAGS_BASE): Add LIBICONV_INCLUDE.
(INTERNAL_LDFLAGS): Add LIBICONV_LIBDIR.
* gdb_obstack.h (obstack_grow_wstr): New define.
* gdb_wchar.h: New file.
* defs.h: Include it.
gdb/testsuite:
* gdb.base/store.exp: Update for change to escape output.
* gdb.base/callfuncs.exp (fetch_all_registers): Update for change
to escape output.
* gdb.base/pointers.exp: Update for change to escape output.
* gdb.base/long_long.exp (gdb_test_long_long): Update for change
to escape output.
* gdb.base/constvars.exp (do_constvar_tests): Update for change to
escape output.
* gdb.base/call-rt-st.exp (print_struct_call): Update for change
to escape output.
* gdb.cp/ref-types.exp (gdb_start_again): Update for change to
escape output.
* gdb.base/setvar.exp: Update for change to escape output.
* lib/gdb.exp (default_gdb_start): Set LC_CTYPE to C.
* gdb.base/printcmds.exp (test_print_all_chars): Update for change
to escape output.
(test_print_string_constants): Likewise.
* gdb.base/charset.exp (valid_host_charset): Check size of
wchar_t. Handle UCS-2 and UCS-4. Add tests for wide and unicode
cases. Handle "auto"-related output.
* gdb.base/charset.c (char16_t, char32_t): New typedefs.
(uvar, Uvar): New globals.
gdb/doc:
* gdb.texinfo (Character Sets): Remove obsolete text. Document
set target-wide-charset.
(Requirements): Mention iconv.
Diffstat (limited to 'gdb/c-lang.c')
-rw-r--r-- | gdb/c-lang.c | 822 |
1 files changed, 750 insertions, 72 deletions
diff --git a/gdb/c-lang.c b/gdb/c-lang.c index 8b5410f..deab3f4 100644 --- a/gdb/c-lang.c +++ b/gdb/c-lang.c @@ -33,48 +33,304 @@ #include "demangle.h" #include "cp-abi.h" #include "cp-support.h" +#include "gdb_obstack.h" +#include <ctype.h> extern void _initialize_c_language (void); -static void c_emit_char (int c, struct ui_file * stream, int quoter); + +/* Given a C string type, STR_TYPE, return the corresponding target + character set name. */ + +static const char * +charset_for_string_type (enum c_string_type str_type) +{ + switch (str_type & ~C_CHAR) + { + case C_STRING: + return target_charset (); + case C_WIDE_STRING: + return target_wide_charset (); + case C_STRING_16: + /* FIXME: UCS-2 is not always correct. */ + if (gdbarch_byte_order (current_gdbarch) == BFD_ENDIAN_BIG) + return "UCS-2BE"; + else + return "UCS-2LE"; + case C_STRING_32: + /* FIXME: UCS-4 is not always correct. */ + if (gdbarch_byte_order (current_gdbarch) == BFD_ENDIAN_BIG) + return "UCS-4BE"; + else + return "UCS-4LE"; + } + internal_error (__FILE__, __LINE__, "unhandled c_string_type"); +} + +/* Classify ELTTYPE according to what kind of character it is. Return + the enum constant representing the character type. Also set + *ENCODING to the name of the character set to use when converting + characters of this type to the host character set. */ + +static enum c_string_type +classify_type (struct type *elttype, const char **encoding) +{ + struct type *saved_type; + enum c_string_type result; + + /* We do one or two passes -- one on ELTTYPE, and then maybe a + second one on a typedef target. */ + do + { + char *name = TYPE_NAME (elttype); + + if (TYPE_CODE (elttype) == TYPE_CODE_CHAR || !name) + { + result = C_CHAR; + goto done; + } + + if (!strcmp (name, "wchar_t")) + { + result = C_WIDE_CHAR; + goto done; + } + + if (!strcmp (name, "char16_t")) + { + result = C_CHAR_16; + goto done; + } + + if (!strcmp (name, "char32_t")) + { + result = C_CHAR_32; + goto done; + } + + saved_type = elttype; + CHECK_TYPEDEF (elttype); + } + while (elttype != saved_type); + + /* Punt. */ + result = C_CHAR; + + done: + *encoding = charset_for_string_type (result); + return result; +} + +/* Return true if print_wchar can display W without resorting to a + numeric escape, false otherwise. */ + +static int +wchar_printable (gdb_wchar_t w) +{ + return (gdb_iswprint (w) + || w == LCST ('\a') || w == LCST ('\b') + || w == LCST ('\f') || w == LCST ('\n') + || w == LCST ('\r') || w == LCST ('\t') + || w == LCST ('\v')); +} + +/* A helper function that converts the contents of STRING to wide + characters and then appends them to OUTPUT. */ + +static void +append_string_as_wide (const char *string, struct obstack *output) +{ + for (; *string; ++string) + { + gdb_wchar_t w = gdb_btowc (*string); + obstack_grow (output, &w, sizeof (gdb_wchar_t)); + } +} + +/* Print a wide character W to OUTPUT. ORIG is a pointer to the + original (target) bytes representing the character, ORIG_LEN is the + number of valid bytes. WIDTH is the number of bytes in a base + characters of the type. OUTPUT is an obstack to which wide + characters are emitted. QUOTER is a (narrow) character indicating + the style of quotes surrounding the character to be printed. + NEED_ESCAPE is an in/out flag which is used to track numeric + escapes across calls. */ + +static void +print_wchar (gdb_wint_t w, const gdb_byte *orig, int orig_len, + int width, struct obstack *output, int quoter, + int *need_escapep) +{ + int need_escape = *need_escapep; + *need_escapep = 0; + if (gdb_iswprint (w) && (!need_escape || (!gdb_iswdigit (w) + && w != LCST ('8') + && w != LCST ('9')))) + { + if (w == gdb_btowc (quoter) || w == LCST ('\\')) + obstack_grow_wstr (output, LCST ("\\")); + obstack_grow (output, &w, sizeof (gdb_wchar_t)); + } + else + { + switch (w) + { + case LCST ('\a'): + obstack_grow_wstr (output, LCST ("\\a")); + break; + case LCST ('\b'): + obstack_grow_wstr (output, LCST ("\\b")); + break; + case LCST ('\f'): + obstack_grow_wstr (output, LCST ("\\f")); + break; + case LCST ('\n'): + obstack_grow_wstr (output, LCST ("\\n")); + break; + case LCST ('\r'): + obstack_grow_wstr (output, LCST ("\\r")); + break; + case LCST ('\t'): + obstack_grow_wstr (output, LCST ("\\t")); + break; + case LCST ('\v'): + obstack_grow_wstr (output, LCST ("\\v")); + break; + default: + { + int i; + + for (i = 0; i + width <= orig_len; i += width) + { + char octal[30]; + ULONGEST value = extract_unsigned_integer (&orig[i], width); + sprintf (octal, "\\%lo", (long) value); + append_string_as_wide (octal, output); + } + /* If we somehow have extra bytes, print them now. */ + while (i < orig_len) + { + char octal[5]; + sprintf (octal, "\\%.3o", orig[i] & 0xff); + append_string_as_wide (octal, output); + ++i; + } + + *need_escapep = 1; + } + break; + } + } +} /* Print the character C on STREAM as part of the contents of a literal string whose delimiter is QUOTER. Note that that format for printing characters and strings is language specific. */ static void -c_emit_char (int c, struct ui_file *stream, int quoter) +c_emit_char (int c, struct type *type, struct ui_file *stream, int quoter) { - const char *escape; - int host_char; + struct obstack wchar_buf, output; + struct cleanup *cleanups; + const char *encoding; + gdb_byte *buf; + struct wchar_iterator *iter; + int need_escape = 0; - c &= 0xFF; /* Avoid sign bit follies */ + classify_type (type, &encoding); - escape = c_target_char_has_backslash_escape (c); - if (escape) - { - if (quoter == '"' && strcmp (escape, "0") == 0) - /* Print nulls embedded in double quoted strings as \000 to - prevent ambiguity. */ - fprintf_filtered (stream, "\\000"); - else - fprintf_filtered (stream, "\\%s", escape); - } - else if (target_char_to_host (c, &host_char) - && host_char_print_literally (host_char)) + buf = alloca (TYPE_LENGTH (type)); + pack_long (buf, type, c); + + iter = make_wchar_iterator (buf, TYPE_LENGTH (type), encoding, + TYPE_LENGTH (type)); + cleanups = make_cleanup_wchar_iterator (iter); + + /* This holds the printable form of the wchar_t data. */ + obstack_init (&wchar_buf); + make_cleanup_obstack_free (&wchar_buf); + + while (1) { - if (host_char == '\\' || host_char == quoter) - fputs_filtered ("\\", stream); - fprintf_filtered (stream, "%c", host_char); + int num_chars; + gdb_wchar_t *chars; + const gdb_byte *buf; + size_t buflen; + int print_escape = 1; + enum wchar_iterate_result result; + + num_chars = wchar_iterate (iter, &result, &chars, &buf, &buflen); + if (num_chars < 0) + break; + if (num_chars > 0) + { + /* If all characters are printable, print them. Otherwise, + we're going to have to print an escape sequence. We + check all characters because we want to print the target + bytes in the escape sequence, and we don't know character + boundaries there. */ + int i; + + print_escape = 0; + for (i = 0; i < num_chars; ++i) + if (!wchar_printable (chars[i])) + { + print_escape = 1; + break; + } + + if (!print_escape) + { + for (i = 0; i < num_chars; ++i) + print_wchar (chars[i], buf, buflen, TYPE_LENGTH (type), + &wchar_buf, quoter, &need_escape); + } + } + + /* This handles the NUM_CHARS == 0 case as well. */ + if (print_escape) + print_wchar (gdb_WEOF, buf, buflen, TYPE_LENGTH (type), &wchar_buf, + quoter, &need_escape); } - else - fprintf_filtered (stream, "\\%.3o", (unsigned int) c); + + /* The output in the host encoding. */ + obstack_init (&output); + make_cleanup_obstack_free (&output); + + convert_between_encodings ("wchar_t", host_charset (), + obstack_base (&wchar_buf), + obstack_object_size (&wchar_buf), + 1, &output, translit_char); + obstack_1grow (&output, '\0'); + + fputs_filtered (obstack_base (&output), stream); + + do_cleanups (cleanups); } void -c_printchar (int c, struct ui_file *stream) +c_printchar (int c, struct type *type, struct ui_file *stream) { + enum c_string_type str_type; + const char *encoding; + + str_type = classify_type (type, &encoding); + switch (str_type) + { + case C_CHAR: + break; + case C_WIDE_CHAR: + fputc_filtered ('L', stream); + break; + case C_CHAR_16: + fputc_filtered ('u', stream); + break; + case C_CHAR_32: + fputc_filtered ('U', stream); + break; + } + fputc_filtered ('\'', stream); - LA_EMIT_CHAR (c, stream, '\''); + LA_EMIT_CHAR (c, type, stream, '\''); fputc_filtered ('\'', stream); } @@ -85,87 +341,208 @@ c_printchar (int c, struct ui_file *stream) printing LENGTH characters, or if FORCE_ELLIPSES. */ void -c_printstr (struct ui_file *stream, const gdb_byte *string, - unsigned int length, int width, int force_ellipses, +c_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string, + unsigned int length, int force_ellipses, const struct value_print_options *options) { unsigned int i; unsigned int things_printed = 0; int in_quotes = 0; int need_comma = 0; + int width = TYPE_LENGTH (type); + struct obstack wchar_buf, output; + struct cleanup *cleanup; + enum c_string_type str_type; + const char *encoding; + struct wchar_iterator *iter; + int finished = 0; + int need_escape = 0; /* If the string was not truncated due to `set print elements', and the last byte of it is a null, we don't print that, in traditional C style. */ if (!force_ellipses && length > 0 - && (extract_unsigned_integer (string + (length - 1) * width, width) - == '\0')) + && (extract_unsigned_integer (string + (length - 1) * width, width) == 0)) length--; + str_type = classify_type (type, &encoding) & ~C_CHAR; + switch (str_type) + { + case C_STRING: + break; + case C_WIDE_STRING: + fputs_filtered ("L", stream); + break; + case C_STRING_16: + fputs_filtered ("u", stream); + break; + case C_STRING_32: + fputs_filtered ("U", stream); + break; + } + if (length == 0) { fputs_filtered ("\"\"", stream); return; } - for (i = 0; i < length && things_printed < options->print_max; ++i) + if (length == -1) + { + unsigned long current_char = 1; + for (i = 0; current_char; ++i) + { + QUIT; + current_char = extract_unsigned_integer (string + i * width, width); + } + length = i; + } + + /* Arrange to iterate over the characters, in wchar_t form. */ + iter = make_wchar_iterator (string, length * width, encoding, width); + cleanup = make_cleanup_wchar_iterator (iter); + + /* WCHAR_BUF is the obstack we use to represent the string in + wchar_t form. */ + obstack_init (&wchar_buf); + make_cleanup_obstack_free (&wchar_buf); + + while (!finished && things_printed < options->print_max) { - /* Position of the character we are examining - to see whether it is repeated. */ - unsigned int rep1; - /* Number of repetitions we have detected so far. */ - unsigned int reps; - unsigned long current_char; + int num_chars; + enum wchar_iterate_result result; + gdb_wchar_t *chars; + const gdb_byte *buf; + size_t buflen; QUIT; if (need_comma) { - fputs_filtered (", ", stream); + obstack_grow_wstr (&wchar_buf, LCST (", ")); need_comma = 0; } - current_char = extract_unsigned_integer (string + i * width, width); + num_chars = wchar_iterate (iter, &result, &chars, &buf, &buflen); + /* We only look at repetitions when we were able to convert a + single character in isolation. This makes the code simpler + and probably does the sensible thing in the majority of + cases. */ + while (num_chars == 1) + { + /* Count the number of repetitions. */ + unsigned int reps = 0; + gdb_wchar_t current_char = chars[0]; + const gdb_byte *orig_buf = buf; + int orig_len = buflen; - rep1 = i + 1; - reps = 1; - while (rep1 < length - && extract_unsigned_integer (string + rep1 * width, width) - == current_char) + if (need_comma) + { + obstack_grow_wstr (&wchar_buf, LCST (", ")); + need_comma = 0; + } + + while (num_chars == 1 && current_char == chars[0]) + { + num_chars = wchar_iterate (iter, &result, &chars, &buf, &buflen); + ++reps; + } + + /* Emit CURRENT_CHAR according to the repetition count and + options. */ + if (reps > options->repeat_count_threshold) + { + if (in_quotes) + { + if (options->inspect_it) + obstack_grow_wstr (&wchar_buf, LCST ("\\\", ")); + else + obstack_grow_wstr (&wchar_buf, LCST ("\", ")); + in_quotes = 0; + } + obstack_grow_wstr (&wchar_buf, LCST ("'")); + need_escape = 0; + print_wchar (current_char, orig_buf, orig_len, width, + &wchar_buf, '\'', &need_escape); + obstack_grow_wstr (&wchar_buf, LCST ("'")); + { + /* Painful gyrations. */ + int j; + char *s = xstrprintf (_(" <repeats %u times>"), reps); + for (j = 0; s[j]; ++j) + { + gdb_wchar_t w = gdb_btowc (s[j]); + obstack_grow (&wchar_buf, &w, sizeof (gdb_wchar_t)); + } + xfree (s); + } + things_printed += options->repeat_count_threshold; + need_comma = 1; + } + else + { + /* Saw the character one or more times, but fewer than + the repetition threshold. */ + if (!in_quotes) + { + if (options->inspect_it) + obstack_grow_wstr (&wchar_buf, LCST ("\\\"")); + else + obstack_grow_wstr (&wchar_buf, LCST ("\"")); + in_quotes = 1; + need_escape = 0; + } + + while (reps-- > 0) + { + print_wchar (current_char, orig_buf, orig_len, width, + &wchar_buf, '"', &need_escape); + ++things_printed; + } + } + } + + /* NUM_CHARS and the other outputs from wchar_iterate are valid + here regardless of which branch was taken above. */ + if (num_chars < 0) { - ++rep1; - ++reps; + /* Hit EOF. */ + finished = 1; + break; } - if (reps > options->repeat_count_threshold) + switch (result) { - if (in_quotes) + case wchar_iterate_invalid: + if (!in_quotes) { if (options->inspect_it) - fputs_filtered ("\\\", ", stream); + obstack_grow_wstr (&wchar_buf, LCST ("\\\"")); else - fputs_filtered ("\", ", stream); - in_quotes = 0; + obstack_grow_wstr (&wchar_buf, LCST ("\"")); + in_quotes = 1; } - LA_PRINT_CHAR (current_char, stream); - fprintf_filtered (stream, _(" <repeats %u times>"), reps); - i = rep1 - 1; - things_printed += options->repeat_count_threshold; - need_comma = 1; - } - else - { - if (!in_quotes) + need_escape = 0; + print_wchar (gdb_WEOF, buf, buflen, width, &wchar_buf, + '"', &need_escape); + break; + + case wchar_iterate_incomplete: + if (in_quotes) { if (options->inspect_it) - fputs_filtered ("\\\"", stream); + obstack_grow_wstr (&wchar_buf, LCST ("\\\",")); else - fputs_filtered ("\"", stream); - in_quotes = 1; + obstack_grow_wstr (&wchar_buf, LCST ("\",")); + in_quotes = 0; } - LA_EMIT_CHAR (current_char, stream, '"'); - ++things_printed; + obstack_grow_wstr (&wchar_buf, LCST (" <incomplete sequence ")); + print_wchar (gdb_WEOF, buf, buflen, width, &wchar_buf, + 0, &need_escape); + obstack_grow_wstr (&wchar_buf, LCST (">")); + finished = 1; + break; } } @@ -173,13 +550,27 @@ c_printstr (struct ui_file *stream, const gdb_byte *string, if (in_quotes) { if (options->inspect_it) - fputs_filtered ("\\\"", stream); + obstack_grow_wstr (&wchar_buf, LCST ("\\\"")); else - fputs_filtered ("\"", stream); + obstack_grow_wstr (&wchar_buf, LCST ("\"")); } - if (force_ellipses || i < length) - fputs_filtered ("...", stream); + if (force_ellipses || !finished) + obstack_grow_wstr (&wchar_buf, LCST ("...")); + + /* OUTPUT is where we collect `char's for printing. */ + obstack_init (&output); + make_cleanup_obstack_free (&output); + + convert_between_encodings ("wchar_t", host_charset (), + obstack_base (&wchar_buf), + obstack_object_size (&wchar_buf), + 1, &output, translit_char); + obstack_1grow (&output, '\0'); + + fputs_filtered (obstack_base (&output), stream); + + do_cleanups (cleanup); } /* Obtain a C string from the inferior storing it in a newly allocated @@ -298,7 +689,285 @@ c_get_string (struct value *value, gdb_byte **buffer, int *length, } -/* Preprocessing and parsing C and C++ expressions. */ +/* Evaluating C and C++ expressions. */ + +/* Convert a UCN. The digits of the UCN start at P and extend no + farther than LIMIT. DEST_CHARSET is the name of the character set + into which the UCN should be converted. The results are written to + OUTPUT. LENGTH is the maximum length of the UCN, either 4 or 8. + Returns a pointer to just after the final digit of the UCN. */ + +static char * +convert_ucn (char *p, char *limit, const char *dest_charset, + struct obstack *output, int length) +{ + unsigned long result = 0; + gdb_byte data[4]; + int i; + + for (i = 0; i < length && p < limit && isxdigit (*p); ++i, ++p) + result = (result << 4) + host_hex_value (*p); + + for (i = 3; i >= 0; --i) + { + data[i] = result & 0xff; + result >>= 8; + } + + convert_between_encodings ("UCS-4BE", dest_charset, data, 4, 4, output, + translit_none); + + return p; +} + +/* Emit a character, VALUE, which was specified numerically, to + OUTPUT. TYPE is the target character type. */ + +static void +emit_numeric_character (struct type *type, unsigned long value, + struct obstack *output) +{ + gdb_byte *buffer; + + buffer = alloca (TYPE_LENGTH (type)); + pack_long (buffer, type, value); + obstack_grow (output, buffer, TYPE_LENGTH (type)); +} + +/* Convert an octal escape sequence. TYPE is the target character + type. The digits of the escape sequence begin at P and extend no + farther than LIMIT. The result is written to OUTPUT. Returns a + pointer to just after the final digit of the escape sequence. */ + +static char * +convert_octal (struct type *type, char *p, char *limit, struct obstack *output) +{ + unsigned long value = 0; + + while (p < limit && isdigit (*p) && *p != '8' && *p != '9') + { + value = 8 * value + host_hex_value (*p); + ++p; + } + + emit_numeric_character (type, value, output); + + return p; +} + +/* Convert a hex escape sequence. TYPE is the target character type. + The digits of the escape sequence begin at P and extend no farther + than LIMIT. The result is written to OUTPUT. Returns a pointer to + just after the final digit of the escape sequence. */ + +static char * +convert_hex (struct type *type, char *p, char *limit, struct obstack *output) +{ + unsigned long value = 0; + + while (p < limit && isxdigit (*p)) + { + value = 16 * value + host_hex_value (*p); + ++p; + } + + emit_numeric_character (type, value, output); + + return p; +} + +#define ADVANCE \ + do { \ + ++p; \ + if (p == limit) \ + error (_("Malformed escape sequence")); \ + } while (0) + +/* Convert an escape sequence to a target format. TYPE is the target + character type to use, and DEST_CHARSET is the name of the target + character set. The backslash of the escape sequence is at *P, and + the escape sequence will not extend past LIMIT. The results are + written to OUTPUT. Returns a pointer to just past the final + character of the escape sequence. */ + +static char * +convert_escape (struct type *type, const char *dest_charset, + char *p, char *limit, struct obstack *output) +{ + /* Skip the backslash. */ + ADVANCE; + + switch (*p) + { + case '\\': + obstack_1grow (output, '\\'); + ++p; + break; + + case 'x': + ADVANCE; + if (!isxdigit (*p)) + error (_("\\x used with no following hex digits.")); + p = convert_hex (type, p, limit, output); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + p = convert_octal (type, p, limit, output); + break; + + case 'u': + case 'U': + { + int length = *p == 'u' ? 4 : 8; + ADVANCE; + if (!isxdigit (*p)) + error (_("\\u used with no following hex digits")); + p = convert_ucn (p, limit, dest_charset, output, length); + } + } + + return p; +} + +/* Given a single string from a (C-specific) OP_STRING list, convert + it to a target string, handling escape sequences specially. The + output is written to OUTPUT. DATA is the input string, which has + length LEN. DEST_CHARSET is the name of the target character set, + and TYPE is the type of target character to use. */ + +static void +parse_one_string (struct obstack *output, char *data, int len, + const char *dest_charset, struct type *type) +{ + char *limit; + + limit = data + len; + + while (data < limit) + { + char *p = data; + /* Look for next escape, or the end of the input. */ + while (p < limit && *p != '\\') + ++p; + /* If we saw a run of characters, convert them all. */ + if (p > data) + convert_between_encodings (host_charset (), dest_charset, + data, p - data, 1, output, translit_none); + /* If we saw an escape, convert it. */ + if (p < limit) + p = convert_escape (type, dest_charset, p, limit, output); + data = p; + } +} + +/* Expression evaluator for the C language family. Most operations + are delegated to evaluate_subexp_standard; see that function for a + description of the arguments. */ + +static struct value * +evaluate_subexp_c (struct type *expect_type, struct expression *exp, + int *pos, enum noside noside) +{ + enum exp_opcode op = exp->elts[*pos].opcode; + + switch (op) + { + case OP_STRING: + { + int oplen, limit; + struct type *type; + struct obstack output; + struct cleanup *cleanup; + struct value *result; + enum c_string_type dest_type; + const char *dest_charset; + + obstack_init (&output); + cleanup = make_cleanup_obstack_free (&output); + + ++*pos; + oplen = longest_to_int (exp->elts[*pos].longconst); + + ++*pos; + limit = *pos + BYTES_TO_EXP_ELEM (oplen + 1); + dest_type + = (enum c_string_type) longest_to_int (exp->elts[*pos].longconst); + switch (dest_type & ~C_CHAR) + { + case C_STRING: + type = language_string_char_type (current_language, + current_gdbarch); + break; + case C_WIDE_STRING: + type = lookup_typename ("wchar_t", NULL, 0); + break; + case C_STRING_16: + type = lookup_typename ("char16_t", NULL, 0); + break; + case C_STRING_32: + type = lookup_typename ("char32_t", NULL, 0); + break; + default: + internal_error (__FILE__, __LINE__, "unhandled c_string_type"); + } + dest_charset = charset_for_string_type (dest_type); + + ++*pos; + while (*pos < limit) + { + int len; + + len = longest_to_int (exp->elts[*pos].longconst); + + ++*pos; + if (noside != EVAL_SKIP) + parse_one_string (&output, &exp->elts[*pos].string, len, + dest_charset, type); + *pos += BYTES_TO_EXP_ELEM (len); + } + + /* Skip the trailing length and opcode. */ + *pos += 2; + + if (noside == EVAL_SKIP) + return NULL; + + if ((dest_type & C_CHAR) != 0) + { + LONGEST value; + + if (obstack_object_size (&output) != TYPE_LENGTH (type)) + error (_("Could not convert character constant to target character set")); + value = unpack_long (type, obstack_base (&output)); + result = value_from_longest (type, value); + } + else + { + int i; + /* Write the terminating character. */ + for (i = 0; i < TYPE_LENGTH (type); ++i) + obstack_1grow (&output, 0); + result = value_typed_string (obstack_base (&output), + obstack_object_size (&output), + type); + } + do_cleanups (cleanup); + return result; + } + break; + + default: + break; + } + return evaluate_subexp_standard (expect_type, exp, pos, noside); +} @@ -396,6 +1065,15 @@ c_language_arch_info (struct gdbarch *gdbarch, lai->bool_type_default = builtin->builtin_int; } +static const struct exp_descriptor exp_descriptor_c = +{ + print_subexp_standard, + operator_length_standard, + op_name_standard, + dump_subexp_body_standard, + evaluate_subexp_c +}; + const struct language_defn c_language_defn = { "c", /* Language name */ @@ -405,7 +1083,7 @@ const struct language_defn c_language_defn = case_sensitive_on, array_row_major, macro_expansion_c, - &exp_descriptor_standard, + &exp_descriptor_c, c_parse, c_error, null_post_parser, @@ -524,7 +1202,7 @@ const struct language_defn cplus_language_defn = case_sensitive_on, array_row_major, macro_expansion_c, - &exp_descriptor_standard, + &exp_descriptor_c, c_parse, c_error, null_post_parser, @@ -562,7 +1240,7 @@ const struct language_defn asm_language_defn = case_sensitive_on, array_row_major, macro_expansion_c, - &exp_descriptor_standard, + &exp_descriptor_c, c_parse, c_error, null_post_parser, @@ -605,7 +1283,7 @@ const struct language_defn minimal_language_defn = case_sensitive_on, array_row_major, macro_expansion_c, - &exp_descriptor_standard, + &exp_descriptor_c, c_parse, c_error, null_post_parser, |