aboutsummaryrefslogtreecommitdiff
path: root/gcc/cppcharset.c
diff options
context:
space:
mode:
authorEric Christopher <echristo@gcc.gnu.org>2004-01-16 22:37:49 +0000
committerEric Christopher <echristo@gcc.gnu.org>2004-01-16 22:37:49 +0000
commitcf551fbaca9987dccd728aa2cbaff67de7393fe3 (patch)
tree95190a526a0984976fac3a95910e9a5e961bea4e /gcc/cppcharset.c
parent2f9c39f8fc04b38b6aac1ad71c83b22cede8b9e9 (diff)
downloadgcc-cf551fbaca9987dccd728aa2cbaff67de7393fe3.zip
gcc-cf551fbaca9987dccd728aa2cbaff67de7393fe3.tar.gz
gcc-cf551fbaca9987dccd728aa2cbaff67de7393fe3.tar.bz2
cppcharset.c (one_iso88591_to_utf8): New function.
2004-01-16 Eric Christopher <echristo@redhat.com> Chandrakala Chavva <cchavva@redhat.com> * cppcharset.c (one_iso88591_to_utf8): New function. (convert_iso88591_utf8): Ditto. Use. (conversion_tab): Use. (_cpp_input_to_utf8): New function. (_cpp_init_iconv_buffer): Ditto. (_cpp_close_iconv_buffer): Ditto. * cpphash.h: Prototype new functions. (cpp_buffer): Add input_cset_desc. * cppinit.c: Add input_charset default. * cpplib.c (cpp_push_buffer): Support init and close of iconv. * cpplib.h (cpp_options): Add input_charset. From-SVN: r76000
Diffstat (limited to 'gcc/cppcharset.c')
-rw-r--r--gcc/cppcharset.c91
1 files changed, 84 insertions, 7 deletions
diff --git a/gcc/cppcharset.c b/gcc/cppcharset.c
index 1b2d0b2..5070366 100644
--- a/gcc/cppcharset.c
+++ b/gcc/cppcharset.c
@@ -170,7 +170,7 @@ one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
{
static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-
+
cppchar_t c;
const uchar *inbuf = *inbufp;
size_t nbytes, i;
@@ -274,7 +274,7 @@ one_cppchar_to_utf8 (cppchar_t c, uchar **outbufp, size_t *outbytesleftp)
The return value is either 0 for success, or an errno value for
failure, which may be E2BIG (need more space), EILSEQ (ill-formed
input sequence), ir EINVAL (incomplete input sequence). */
-
+
static inline int
one_utf8_to_utf32 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
uchar **outbufp, size_t *outbytesleftp)
@@ -446,6 +446,31 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
return 0;
}
+/* The first 256 code points of ISO 8859.1 have the same numeric
+ values as the first 256 code points of Unicode, therefore the
+ incoming ISO 8859.1 character can be passed directly to
+ one_cppchar_to_utf8 (which expects a Unicode value). */
+
+static int
+one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
+ size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
+{
+ const uchar *inbuf = *inbufp;
+ int rval;
+
+ if (*inbytesleftp > 1)
+ return EINVAL;
+
+ rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
+ if (rval)
+ return rval;
+
+ *inbufp += 1;
+ *inbytesleftp -= 1;
+
+ return 0;
+}
+
/* Helper routine for the next few functions. The 'const' on
one_conversion means that we promise not to modify what function is
pointed to, which lets the inliner see through it. */
@@ -489,7 +514,7 @@ conversion_loop (int (*const one_conversion)(iconv_t, const uchar **, size_t *,
outbuf = to->text + to->asize - outbytesleft;
}
}
-
+
/* These functions convert entire strings between character sets.
They all have the signature
@@ -529,6 +554,14 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
}
+static bool
+convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
+ struct _cpp_strbuf *to)
+{
+ return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
+}
+
+
/* Identity conversion, used when we have no alternative. */
static bool
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@@ -606,6 +639,7 @@ static const struct conversion conversion_tab[] = {
{ "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
{ "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
{ "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
+ { "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
};
/* Subroutine of cpp_init_iconv: initialize and return a
@@ -619,7 +653,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
struct cset_converter ret;
char *pair;
size_t i;
-
+
if (!strcasecmp (to, from))
{
ret.func = convert_no_conversion;
@@ -649,7 +683,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
if (ret.cd == (iconv_t) -1)
{
if (errno == EINVAL)
- cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
+ cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
"conversion from %s to %s not supported by iconv",
from, to);
else
@@ -660,7 +694,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
}
else
{
- cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
+ cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
"no iconv implementation, cannot convert from %s to %s",
from, to);
ret.func = convert_no_conversion;
@@ -1270,7 +1304,7 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
*unsignedp = unsigned_p;
return result;
}
-
+
/* Subroutine of cpp_interpret_charconst which performs the conversion
to a number, for wide strings. STR is the string structure returned
by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for
@@ -1352,3 +1386,46 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
return result;
}
+
+uchar *
+_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
+{
+ struct _cpp_strbuf tbuf;
+ struct cset_converter cvt = pfile->buffer->input_cset_desc;
+
+ tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
+ tbuf.text = xmalloc (tbuf.asize);
+ tbuf.len = 0;
+
+ if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
+ {
+ cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
+ return NULL;
+ }
+
+ if (length)
+ tbuf.text[tbuf.len] = '\n';
+ else
+ tbuf.text[0] = '\n';
+
+ return tbuf.text;
+}
+
+ /* Check the input file format. At present assuming the input file
+ is in iso-8859-1 format. Convert this input character set to
+ source character set format (UTF-8). */
+
+void
+_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
+{
+ pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
+ from);
+}
+
+void
+_cpp_close_iconv_buffer (cpp_reader *pfile)
+{
+ if (HAVE_ICONV
+ && pfile->buffer->input_cset_desc.func == convert_using_iconv)
+ iconv_close (pfile->buffer->input_cset_desc.cd);
+}