cppcharset.c (one_iso88591_to_utf8): New function.

2004-01-16 Eric Christopher <echristo@redhat.com> Chandrakala Chavva <cchavva@redhat.com> * cppcharset.c (one_iso88591_to_utf8): New function. (convert_iso88591_utf8): Ditto. Use. (conversion_tab): Use. (_cpp_input_to_utf8): New function. (_cpp_init_iconv_buffer): Ditto. (_cpp_close_iconv_buffer): Ditto. * cpphash.h: Prototype new functions. (cpp_buffer): Add input_cset_desc. * cppinit.c: Add input_charset default. * cpplib.c (cpp_push_buffer): Support init and close of iconv. * cpplib.h (cpp_options): Add input_charset. From-SVN: r76000
author: Eric Christopher <echristo@gcc.gnu.org> 2004-01-16 22:37:49 +0000
committer: Eric Christopher <echristo@gcc.gnu.org> 2004-01-16 22:37:49 +0000
commit: cf551fbaca9987dccd728aa2cbaff67de7393fe3 (patch)
tree: 95190a526a0984976fac3a95910e9a5e961bea4e /gcc/cppcharset.c
parent: 2f9c39f8fc04b38b6aac1ad71c83b22cede8b9e9 (diff)
download: gcc-cf551fbaca9987dccd728aa2cbaff67de7393fe3.zip
gcc-cf551fbaca9987dccd728aa2cbaff67de7393fe3.tar.gz
gcc-cf551fbaca9987dccd728aa2cbaff67de7393fe3.tar.bz2
1 files changed, 84 insertions, 7 deletions
diff --git a/gcc/cppcharset.c b/gcc/cppcharset.c
index 1b2d0b2..5070366 100644
--- a/gcc/cppcharset.c
+++ b/gcc/cppcharset.c
@@ -170,7 +170,7 @@ one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
 {
   static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
   static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-  
+
   cppchar_t c;
   const uchar *inbuf = *inbufp;
   size_t nbytes, i;
@@ -274,7 +274,7 @@ one_cppchar_to_utf8 (cppchar_t c, uchar **outbufp, size_t *outbytesleftp)
    The return value is either 0 for success, or an errno value for
    failure, which may be E2BIG (need more space), EILSEQ (ill-formed
    input sequence), ir EINVAL (incomplete input sequence).  */
-   
+
 static inline int
 one_utf8_to_utf32 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
 		   uchar **outbufp, size_t *outbytesleftp)
@@ -446,6 +446,31 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
   return 0;
 }
 
+/* The first 256 code points of ISO 8859.1 have the same numeric
+   values as the first 256 code points of Unicode, therefore the
+   incoming ISO 8859.1 character can be passed directly to
+   one_cppchar_to_utf8 (which expects a Unicode value).  */
+
+static int
+one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
+		      size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
+{
+  const uchar *inbuf = *inbufp;
+  int rval;
+
+  if (*inbytesleftp > 1)
+    return EINVAL;
+
+  rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
+  if (rval)
+    return rval;
+
+  *inbufp += 1;
+  *inbytesleftp -= 1;
+
+  return 0;
+}
+
 /* Helper routine for the next few functions.  The 'const' on
    one_conversion means that we promise not to modify what function is
    pointed to, which lets the inliner see through it.  */
@@ -489,7 +514,7 @@ conversion_loop (int (*const one_conversion)(iconv_t, const uchar **, size_t *,
       outbuf = to->text + to->asize - outbytesleft;
     }
 }
-		 
+
 
 /* These functions convert entire strings between character sets.
    They all have the signature
@@ -529,6 +554,14 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
   return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
 }
 
+static bool
+convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
+                       struct _cpp_strbuf *to)
+{
+  return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
+}
+
+
 /* Identity conversion, used when we have no alternative.  */
 static bool
 convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@@ -606,6 +639,7 @@ static const struct conversion conversion_tab[] = {
   { "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
   { "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
   { "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
+  { "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
 };
 
 /* Subroutine of cpp_init_iconv: initialize and return a
@@ -619,7 +653,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
   struct cset_converter ret;
   char *pair;
   size_t i;
-  
+
   if (!strcasecmp (to, from))
     {
       ret.func = convert_no_conversion;
@@ -649,7 +683,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
       if (ret.cd == (iconv_t) -1)
 	{
 	  if (errno == EINVAL)
-	    cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
+	    cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
 		       "conversion from %s to %s not supported by iconv",
 		       from, to);
 	  else
@@ -660,7 +694,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
     }
   else
     {
-      cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
+      cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
 		 "no iconv implementation, cannot convert from %s to %s",
 		 from, to);
       ret.func = convert_no_conversion;
@@ -1270,7 +1304,7 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
   *unsignedp = unsigned_p;
   return result;
 }
-			 
+
 /* Subroutine of cpp_interpret_charconst which performs the conversion
    to a number, for wide strings.  STR is the string structure returned
    by cpp_interpret_string.  PCHARS_SEEN and UNSIGNEDP are as for
@@ -1352,3 +1386,46 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
 
   return result;
 }
+
+uchar *
+_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
+{
+  struct _cpp_strbuf tbuf;
+  struct cset_converter cvt = pfile->buffer->input_cset_desc;
+
+  tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
+  tbuf.text = xmalloc (tbuf.asize);
+  tbuf.len = 0;
+
+  if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
+   {
+      cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
+      return NULL;
+   }
+
+  if (length)
+    tbuf.text[tbuf.len] = '\n';
+  else
+    tbuf.text[0] = '\n';
+
+  return tbuf.text;
+}
+
+  /* Check the input file format. At present assuming the input file
+     is in iso-8859-1 format. Convert this input character set to
+     source character set format (UTF-8). */
+
+void
+_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
+{
+  pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
+						    from);
+}
+
+void
+_cpp_close_iconv_buffer (cpp_reader *pfile)
+{
+  if (HAVE_ICONV
+      && pfile->buffer->input_cset_desc.func == convert_using_iconv)
+    iconv_close (pfile->buffer->input_cset_desc.cd);
+}
author	Eric Christopher <echristo@gcc.gnu.org>	2004-01-16 22:37:49 +0000
committer	Eric Christopher <echristo@gcc.gnu.org>	2004-01-16 22:37:49 +0000
commit	cf551fbaca9987dccd728aa2cbaff67de7393fe3 (patch)
tree	95190a526a0984976fac3a95910e9a5e961bea4e /gcc/cppcharset.c
parent	2f9c39f8fc04b38b6aac1ad71c83b22cede8b9e9 (diff)
download	gcc-cf551fbaca9987dccd728aa2cbaff67de7393fe3.zip gcc-cf551fbaca9987dccd728aa2cbaff67de7393fe3.tar.gz gcc-cf551fbaca9987dccd728aa2cbaff67de7393fe3.tar.bz2