diff options
author | Eric Christopher <echristo@gcc.gnu.org> | 2004-02-02 20:20:58 +0000 |
---|---|---|
committer | Eric Christopher <echristo@gcc.gnu.org> | 2004-02-02 20:20:58 +0000 |
commit | 16dd5cfeb8364bb5b8ea60de17788a2d414c8972 (patch) | |
tree | ceb6203dee329b18b1ae6797a54fb24e65a13847 /gcc/cppcharset.c | |
parent | 6699d593734ee8f2cf7472ac581e123e54936be4 (diff) | |
download | gcc-16dd5cfeb8364bb5b8ea60de17788a2d414c8972.zip gcc-16dd5cfeb8364bb5b8ea60de17788a2d414c8972.tar.gz gcc-16dd5cfeb8364bb5b8ea60de17788a2d414c8972.tar.bz2 |
[multiple changes]
2004-02-02 Eric Christopher <echristo@redhat.com>
Zack Weinberg <zack@codesourcery.com>
* c-opts.c (c_common_handle_option): Add -finput-charset.
* c.opt: Ditto.
* cppcharset.c (one_iso88591_to_utf8): Remove.
(convert_iso88591_utf8): Ditto.
(conversion_tab): Remove 8859-1 converter.
(_cpp_input_to_utf8): Remove.
(_cpp_init_iconv_buffer): Ditto.
(_cpp_close_iconv_buffer): Ditto.
(_cpp_convert_input): New function.
(_cpp_default_encoding): Ditto.
* cpphash.h: Add/remove prototypes for above.
* cppfiles.c (read_file_guts): Use _cpp_convert_input.
* cppinit.c (cpp_create_reader): Use _cpp_default_encoding
for narrow execution and input character sets.
* cpplib.c (cpp_push_buffer): Delete uses of removed functions.
* doc/cppopts.texi: Document -finput-charset.
2004-02-02 Eric Christopher <echristo@redhat.com>
Zack Weinberg <zack@codesourcery.com>
* gcc.c-torture/execute/wchar_t-1.c: Add -finput-charset.
2004-01-29 Eric Christopher <echristo@redhat.com>
Zack Weinberg <zack@codesourcery.com>
* testsuite/22_locale/collate/compare/wchar_t/2.cc: Remove xfail. Use
-finput-charset.
* testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc: Ditto.
* testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc: Ditto
* testsuite/22_locale/collate/hash/wchar_t/2.cc: Ditto.
* testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc: Ditto.
* testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc: Ditto.
* testsuite/22_locale/collate/transform/wchar_t/2.cc: Ditto.
* testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc: Ditto.
* testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc:
Ditto.
From-SVN: r77136
Diffstat (limited to 'gcc/cppcharset.c')
-rw-r--r-- | gcc/cppcharset.c | 108 |
1 files changed, 44 insertions, 64 deletions
diff --git a/gcc/cppcharset.c b/gcc/cppcharset.c index 9fc180a..b73128d 100644 --- a/gcc/cppcharset.c +++ b/gcc/cppcharset.c @@ -446,31 +446,6 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp, return 0; } -/* The first 256 code points of ISO 8859.1 have the same numeric - values as the first 256 code points of Unicode, therefore the - incoming ISO 8859.1 character can be passed directly to - one_cppchar_to_utf8 (which expects a Unicode value). */ - -static int -one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp, - size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp) -{ - const uchar *inbuf = *inbufp; - int rval; - - if (*inbytesleftp > 1) - return EINVAL; - - rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp); - if (rval) - return rval; - - *inbufp += 1; - *inbytesleftp -= 1; - - return 0; -} - /* Helper routine for the next few functions. The 'const' on one_conversion means that we promise not to modify what function is pointed to, which lets the inliner see through it. */ @@ -554,14 +529,6 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen, return conversion_loop (one_utf32_to_utf8, cd, from, flen, to); } -static bool -convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen, - struct _cpp_strbuf *to) -{ - return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to); -} - - /* Identity conversion, used when we have no alternative. */ static bool convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED, @@ -639,7 +606,6 @@ static const struct conversion conversion_tab[] = { { "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 }, { "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 }, { "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 }, - { "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 }, }; /* Subroutine of cpp_init_iconv: initialize and return a @@ -1388,44 +1354,58 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token, } uchar * -_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length) +_cpp_convert_input (cpp_reader *pfile, const char *input_charset, + uchar *input, size_t size, size_t len, off_t *st_size) { - struct _cpp_strbuf tbuf; - struct cset_converter cvt = pfile->buffer->input_cset_desc; + struct cset_converter input_cset; + struct _cpp_strbuf to; - tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length); - tbuf.text = xmalloc (tbuf.asize); - tbuf.len = 0; + input_cset = init_iconv_desc (pfile, SOURCE_CHARSET, input_charset); + if (input_cset.func == convert_no_conversion) + { + to.text = input; + to.asize = size; + to.len = len; + } + else + { + to.asize = MAX (65536, len); + to.text = xmalloc (to.asize); + to.len = 0; - if (!APPLY_CONVERSION (cvt, input, length, &tbuf)) - { - cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set."); - return NULL; - } + if (!APPLY_CONVERSION (input_cset, input, len, &to)) + cpp_error (pfile, CPP_DL_ERROR, + "failure to convert %s to %s", + CPP_OPTION (pfile, input_charset), SOURCE_CHARSET); - if (length) - tbuf.text[tbuf.len] = '\n'; - else - tbuf.text[0] = '\n'; + free (input); + } - return tbuf.text; -} + /* Clean up the mess. */ + if (input_cset.func == convert_using_iconv) + iconv_close (input_cset.cd); - /* Check the input file format. At present assuming the input file - is in iso-8859-1 format. Convert this input character set to - source character set format (UTF-8). */ + /* Resize buffer if we allocated substantially too much, or if we + haven't enough space for the \n-terminator. */ + if (to.len + 4096 < to.asize || to.len >= to.asize) + to.text = xrealloc (to.text, to.len + 1); -void -_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from) -{ - pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET, - from); + to.text[to.len] = '\n'; + *st_size = to.len; + return to.text; } -void -_cpp_close_iconv_buffer (cpp_reader *pfile) +const char * +_cpp_default_encoding (void) { - if (HAVE_ICONV - && pfile->buffer->input_cset_desc.func == convert_using_iconv) - iconv_close (pfile->buffer->input_cset_desc.cd); + const char *current_encoding = NULL; + +#if defined (HAVE_LOCALE_H) && defined (HAVE_LANGINFO_CODESET) + setlocale (LC_CTYPE, ""); + current_encoding = nl_langinfo (CODESET); +#endif + if (current_encoding == NULL || *current_encoding == '\0') + current_encoding = SOURCE_CHARSET; + + return current_encoding; } |