[multiple changes]

2004-02-02 Eric Christopher <echristo@redhat.com> Zack Weinberg <zack@codesourcery.com> * c-opts.c (c_common_handle_option): Add -finput-charset. * c.opt: Ditto. * cppcharset.c (one_iso88591_to_utf8): Remove. (convert_iso88591_utf8): Ditto. (conversion_tab): Remove 8859-1 converter. (_cpp_input_to_utf8): Remove. (_cpp_init_iconv_buffer): Ditto. (_cpp_close_iconv_buffer): Ditto. (_cpp_convert_input): New function. (_cpp_default_encoding): Ditto. * cpphash.h: Add/remove prototypes for above. * cppfiles.c (read_file_guts): Use _cpp_convert_input. * cppinit.c (cpp_create_reader): Use _cpp_default_encoding for narrow execution and input character sets. * cpplib.c (cpp_push_buffer): Delete uses of removed functions. * doc/cppopts.texi: Document -finput-charset. 2004-02-02 Eric Christopher <echristo@redhat.com> Zack Weinberg <zack@codesourcery.com> * gcc.c-torture/execute/wchar_t-1.c: Add -finput-charset. 2004-01-29 Eric Christopher <echristo@redhat.com> Zack Weinberg <zack@codesourcery.com> * testsuite/22_locale/collate/compare/wchar_t/2.cc: Remove xfail. Use -finput-charset. * testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc: Ditto. * testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc: Ditto * testsuite/22_locale/collate/hash/wchar_t/2.cc: Ditto. * testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc: Ditto. * testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc: Ditto. * testsuite/22_locale/collate/transform/wchar_t/2.cc: Ditto. * testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc: Ditto. * testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc: Ditto. From-SVN: r77136
author: Eric Christopher <echristo@gcc.gnu.org> 2004-02-02 20:20:58 +0000
committer: Eric Christopher <echristo@gcc.gnu.org> 2004-02-02 20:20:58 +0000
commit: 16dd5cfeb8364bb5b8ea60de17788a2d414c8972 (patch)
tree: ceb6203dee329b18b1ae6797a54fb24e65a13847 /gcc/cppcharset.c
parent: 6699d593734ee8f2cf7472ac581e123e54936be4 (diff)
download: gcc-16dd5cfeb8364bb5b8ea60de17788a2d414c8972.zip
gcc-16dd5cfeb8364bb5b8ea60de17788a2d414c8972.tar.gz
gcc-16dd5cfeb8364bb5b8ea60de17788a2d414c8972.tar.bz2
1 files changed, 44 insertions, 64 deletions
diff --git a/gcc/cppcharset.c b/gcc/cppcharset.c
index 9fc180a..b73128d 100644
--- a/gcc/cppcharset.c
+++ b/gcc/cppcharset.c
@@ -446,31 +446,6 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
   return 0;
 }
 
-/* The first 256 code points of ISO 8859.1 have the same numeric
-   values as the first 256 code points of Unicode, therefore the
-   incoming ISO 8859.1 character can be passed directly to
-   one_cppchar_to_utf8 (which expects a Unicode value).  */
-
-static int
-one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
-		      size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
-{
-  const uchar *inbuf = *inbufp;
-  int rval;
-
-  if (*inbytesleftp > 1)
-    return EINVAL;
-
-  rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
-  if (rval)
-    return rval;
-
-  *inbufp += 1;
-  *inbytesleftp -= 1;
-
-  return 0;
-}
-
 /* Helper routine for the next few functions.  The 'const' on
    one_conversion means that we promise not to modify what function is
    pointed to, which lets the inliner see through it.  */
@@ -554,14 +529,6 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
   return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
 }
 
-static bool
-convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
-                       struct _cpp_strbuf *to)
-{
-  return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
-}
-
-
 /* Identity conversion, used when we have no alternative.  */
 static bool
 convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@@ -639,7 +606,6 @@ static const struct conversion conversion_tab[] = {
   { "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
   { "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
   { "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
-  { "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
 };
 
 /* Subroutine of cpp_init_iconv: initialize and return a
@@ -1388,44 +1354,58 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
 }
 
 uchar *
-_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
+_cpp_convert_input (cpp_reader *pfile, const char *input_charset,
+		    uchar *input, size_t size, size_t len, off_t *st_size)
 {
-  struct _cpp_strbuf tbuf;
-  struct cset_converter cvt = pfile->buffer->input_cset_desc;
+  struct cset_converter input_cset;
+  struct _cpp_strbuf to;
 
-  tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
-  tbuf.text = xmalloc (tbuf.asize);
-  tbuf.len = 0;
+  input_cset = init_iconv_desc (pfile, SOURCE_CHARSET, input_charset);
+  if (input_cset.func == convert_no_conversion)
+    {
+      to.text = input;
+      to.asize = size;
+      to.len = len;
+    }
+  else
+    {
+      to.asize = MAX (65536, len);
+      to.text = xmalloc (to.asize);
+      to.len = 0;
 
-  if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
-   {
-      cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
-      return NULL;
-   }
+      if (!APPLY_CONVERSION (input_cset, input, len, &to))
+	cpp_error (pfile, CPP_DL_ERROR,
+		   "failure to convert %s to %s",
+		   CPP_OPTION (pfile, input_charset), SOURCE_CHARSET);
 
-  if (length)
-    tbuf.text[tbuf.len] = '\n';
-  else
-    tbuf.text[0] = '\n';
+      free (input);
+    }
 
-  return tbuf.text;
-}
+  /* Clean up the mess.  */
+  if (input_cset.func == convert_using_iconv)
+    iconv_close (input_cset.cd);
 
-  /* Check the input file format. At present assuming the input file
-     is in iso-8859-1 format. Convert this input character set to
-     source character set format (UTF-8). */
+  /* Resize buffer if we allocated substantially too much, or if we
+     haven't enough space for the \n-terminator.  */
+  if (to.len + 4096 < to.asize || to.len >= to.asize)
+    to.text = xrealloc (to.text, to.len + 1);
 
-void
-_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
-{
-  pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
-						    from);
+  to.text[to.len] = '\n';
+  *st_size = to.len;
+  return to.text;
 }
 
-void
-_cpp_close_iconv_buffer (cpp_reader *pfile)
+const char *
+_cpp_default_encoding (void)
 {
-  if (HAVE_ICONV
-      && pfile->buffer->input_cset_desc.func == convert_using_iconv)
-    iconv_close (pfile->buffer->input_cset_desc.cd);
+  const char *current_encoding = NULL;
+
+#if defined (HAVE_LOCALE_H) && defined (HAVE_LANGINFO_CODESET)
+  setlocale (LC_CTYPE, "");
+  current_encoding = nl_langinfo (CODESET);
+#endif
+  if (current_encoding == NULL || *current_encoding == '\0')
+    current_encoding = SOURCE_CHARSET;
+
+  return current_encoding;
 }
author	Eric Christopher <echristo@gcc.gnu.org>	2004-02-02 20:20:58 +0000
committer	Eric Christopher <echristo@gcc.gnu.org>	2004-02-02 20:20:58 +0000
commit	16dd5cfeb8364bb5b8ea60de17788a2d414c8972 (patch)
tree	ceb6203dee329b18b1ae6797a54fb24e65a13847 /gcc/cppcharset.c
parent	6699d593734ee8f2cf7472ac581e123e54936be4 (diff)
download	gcc-16dd5cfeb8364bb5b8ea60de17788a2d414c8972.zip gcc-16dd5cfeb8364bb5b8ea60de17788a2d414c8972.tar.gz gcc-16dd5cfeb8364bb5b8ea60de17788a2d414c8972.tar.bz2