aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog24
-rw-r--r--gcc/cppcharset.c91
-rw-r--r--gcc/cpphash.h9
-rw-r--r--gcc/cppinit.c3
-rw-r--r--gcc/cpplib.c13
-rw-r--r--gcc/cpplib.h7
6 files changed, 129 insertions, 18 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d7afc19..8a3d1fd 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2004-01-16 Eric Christopher <echristo@redhat.com>
+ Chandrakala Chavva <cchavva@redhat.com>
+
+ * cppcharset.c (one_iso88591_to_utf8): New function.
+ (convert_iso88591_utf8): Ditto. Use.
+ (conversion_tab): Use.
+ (_cpp_input_to_utf8): New function.
+ (_cpp_init_iconv_buffer): Ditto.
+ (_cpp_close_iconv_buffer): Ditto.
+ * cpphash.h: Prototype new functions.
+ (cpp_buffer): Add input_cset_desc.
+ * cppinit.c: Add input_charset default.
+ * cpplib.c (cpp_push_buffer): Support init and
+ close of iconv.
+ * cpplib.h (cpp_options): Add input_charset.
+
2004-01-16 Kazu Hirata <kazu@cs.umass.edu>
* system.h (ASM_OUTPUT_SECTION_NAME): Poison.
@@ -14,23 +30,23 @@
* fixinc/tests/base/sys/stat.h: Adapt for new hackname.
* fixinc/inclhack.def (alpha___extern_prefix,
- alpha___extern_prefix_standards): New hacks to obey
+ alpha___extern_prefix_standards): New hacks to obey
__PRAGMA_EXTERN_PREFIX.
* fixinc/tests/base/testing.h [ALPHA___EXTERN_PREFIX_CHECK]: New
test.
* fixinc/tests/base/standards.h: Likewise.
-
+
* fixincl/inclhack.def (alpha_pthread): Tweak to match more
variations.
New testcase.
* fixinc/tests/base/pthread.h: Handle it.
-
+
* fixincl/inclhack.def (bad_lval): Sort file list.
Add many missing files up to Tru64 UNIX V5.1B.
* gcc/fixinc/tests/base/libgen.h: Renamed to ...
* gcc/fixinc/tests/base/dirent.h: ... this to match new file list
order.
-
+
* fixinc/fixincl.x: Regenerate.
2004-01-16 Mark Mitchell <mark@codesourcery.com>
diff --git a/gcc/cppcharset.c b/gcc/cppcharset.c
index 1b2d0b2..5070366 100644
--- a/gcc/cppcharset.c
+++ b/gcc/cppcharset.c
@@ -170,7 +170,7 @@ one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
{
static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-
+
cppchar_t c;
const uchar *inbuf = *inbufp;
size_t nbytes, i;
@@ -274,7 +274,7 @@ one_cppchar_to_utf8 (cppchar_t c, uchar **outbufp, size_t *outbytesleftp)
The return value is either 0 for success, or an errno value for
failure, which may be E2BIG (need more space), EILSEQ (ill-formed
input sequence), ir EINVAL (incomplete input sequence). */
-
+
static inline int
one_utf8_to_utf32 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
uchar **outbufp, size_t *outbytesleftp)
@@ -446,6 +446,31 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
return 0;
}
+/* The first 256 code points of ISO 8859.1 have the same numeric
+ values as the first 256 code points of Unicode, therefore the
+ incoming ISO 8859.1 character can be passed directly to
+ one_cppchar_to_utf8 (which expects a Unicode value). */
+
+static int
+one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
+ size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
+{
+ const uchar *inbuf = *inbufp;
+ int rval;
+
+ if (*inbytesleftp > 1)
+ return EINVAL;
+
+ rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
+ if (rval)
+ return rval;
+
+ *inbufp += 1;
+ *inbytesleftp -= 1;
+
+ return 0;
+}
+
/* Helper routine for the next few functions. The 'const' on
one_conversion means that we promise not to modify what function is
pointed to, which lets the inliner see through it. */
@@ -489,7 +514,7 @@ conversion_loop (int (*const one_conversion)(iconv_t, const uchar **, size_t *,
outbuf = to->text + to->asize - outbytesleft;
}
}
-
+
/* These functions convert entire strings between character sets.
They all have the signature
@@ -529,6 +554,14 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
}
+static bool
+convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
+ struct _cpp_strbuf *to)
+{
+ return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
+}
+
+
/* Identity conversion, used when we have no alternative. */
static bool
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@@ -606,6 +639,7 @@ static const struct conversion conversion_tab[] = {
{ "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
{ "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
{ "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
+ { "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
};
/* Subroutine of cpp_init_iconv: initialize and return a
@@ -619,7 +653,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
struct cset_converter ret;
char *pair;
size_t i;
-
+
if (!strcasecmp (to, from))
{
ret.func = convert_no_conversion;
@@ -649,7 +683,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
if (ret.cd == (iconv_t) -1)
{
if (errno == EINVAL)
- cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
+ cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
"conversion from %s to %s not supported by iconv",
from, to);
else
@@ -660,7 +694,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
}
else
{
- cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
+ cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
"no iconv implementation, cannot convert from %s to %s",
from, to);
ret.func = convert_no_conversion;
@@ -1270,7 +1304,7 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
*unsignedp = unsigned_p;
return result;
}
-
+
/* Subroutine of cpp_interpret_charconst which performs the conversion
to a number, for wide strings. STR is the string structure returned
by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for
@@ -1352,3 +1386,46 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
return result;
}
+
+uchar *
+_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
+{
+ struct _cpp_strbuf tbuf;
+ struct cset_converter cvt = pfile->buffer->input_cset_desc;
+
+ tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
+ tbuf.text = xmalloc (tbuf.asize);
+ tbuf.len = 0;
+
+ if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
+ {
+ cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
+ return NULL;
+ }
+
+ if (length)
+ tbuf.text[tbuf.len] = '\n';
+ else
+ tbuf.text[0] = '\n';
+
+ return tbuf.text;
+}
+
+ /* Check the input file format. At present assuming the input file
+ is in iso-8859-1 format. Convert this input character set to
+ source character set format (UTF-8). */
+
+void
+_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
+{
+ pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
+ from);
+}
+
+void
+_cpp_close_iconv_buffer (cpp_reader *pfile)
+{
+ if (HAVE_ICONV
+ && pfile->buffer->input_cset_desc.func == convert_using_iconv)
+ iconv_close (pfile->buffer->input_cset_desc.cd);
+}
diff --git a/gcc/cpphash.h b/gcc/cpphash.h
index 80cb04c..6c13ea1 100644
--- a/gcc/cpphash.h
+++ b/gcc/cpphash.h
@@ -270,7 +270,7 @@ struct cpp_buffer
const uchar *cur; /* Current location. */
const uchar *line_base; /* Start of current physical line. */
const uchar *next_line; /* Start of to-be-cleaned logical line. */
-
+
const uchar *buf; /* Entire character buffer. */
const uchar *rlimit; /* Writable byte at end of file. */
@@ -313,6 +313,10 @@ struct cpp_buffer
/* Used for buffer overlays by cpptrad.c. */
const uchar *saved_cur, *saved_rlimit;
+
+ /* Descriptor for converting from the input character set to the
+ source character set. */
+ struct cset_converter input_cset_desc;
};
/* A cpp_reader encapsulates the "state" of a pre-processor run.
@@ -557,6 +561,9 @@ extern void _cpp_init_internal_pragmas (cpp_reader *);
extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
unsigned int, unsigned int);
extern void _cpp_pop_buffer (cpp_reader *);
+extern uchar *_cpp_input_to_utf8 (cpp_reader *, const unsigned char *, cppchar_t);
+extern void _cpp_init_iconv_buffer (cpp_reader *, const char *);
+extern void _cpp_close_iconv_buffer (cpp_reader *);
/* In cpptrad.c. */
extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *);
diff --git a/gcc/cppinit.c b/gcc/cppinit.c
index 1332688..629da27 100644
--- a/gcc/cppinit.c
+++ b/gcc/cppinit.c
@@ -161,6 +161,9 @@ cpp_create_reader (enum c_lang lang, hash_table *table)
CPP_OPTION (pfile, narrow_charset) = 0;
CPP_OPTION (pfile, wide_charset) = 0;
+ /* Default the input character set to iso-8859-1 for now. */
+ CPP_OPTION (pfile, input_charset) = "ISO-8859-1";
+
/* A fake empty "directory" used as the starting point for files
looked up without a search path. Name cannot be '/' because we
don't want to prepend anything at all to filenames using it. All
diff --git a/gcc/cpplib.c b/gcc/cpplib.c
index 2b213cb..feb8717 100644
--- a/gcc/cpplib.c
+++ b/gcc/cpplib.c
@@ -549,14 +549,14 @@ do_undef (cpp_reader *pfile)
/* Undefine a single macro/assertion/whatever. */
static int
-undefine_macros (cpp_reader *pfile, cpp_hashnode *h,
+undefine_macros (cpp_reader *pfile, cpp_hashnode *h,
void *data_p ATTRIBUTE_UNUSED)
{
switch (h->type)
{
case NT_VOID:
break;
-
+
case NT_MACRO:
if (pfile->cb.undef)
(*pfile->cb.undef) (pfile, pfile->directive_line, h);
@@ -855,7 +855,7 @@ do_linemarker (cpp_reader *pfile)
cpp_string s = { 0, 0 };
if (_cpp_interpret_string_notranslate (pfile, &token->val.str, &s))
new_file = (const char *)s.text;
-
+
new_sysp = 0;
flag = read_flag (pfile, 0);
if (flag == 1)
@@ -1159,7 +1159,7 @@ do_pragma (cpp_reader *pfile)
(*p->u.handler) (pfile);
if (pfile->cb.line_change)
(*pfile->cb.line_change) (pfile, pfile->cur_token, false);
-
+
}
else if (pfile->cb.def_pragma)
{
@@ -1925,6 +1925,7 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
int from_stage3)
{
cpp_buffer *new = xobnew (&pfile->buffer_ob, cpp_buffer);
+ const char *input = CPP_OPTION (pfile, input_charset);
/* Clears, amongst other things, if_stack and mi_cmacro. */
memset (new, 0, sizeof (cpp_buffer));
@@ -1936,6 +1937,8 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
new->need_line = true;
pfile->buffer = new;
+ _cpp_init_iconv_buffer (pfile, input);
+
return new;
}
@@ -1957,6 +1960,8 @@ _cpp_pop_buffer (cpp_reader *pfile)
/* In case of a missing #endif. */
pfile->state.skipping = 0;
+ _cpp_close_iconv_buffer (pfile);
+
/* _cpp_do_file_change expects pfile->buffer to be the new one. */
pfile->buffer = buffer->prev;
diff --git a/gcc/cpplib.h b/gcc/cpplib.h
index 5f18924..f7e12d2 100644
--- a/gcc/cpplib.h
+++ b/gcc/cpplib.h
@@ -332,6 +332,9 @@ struct cpp_options
/* Holds the name of the target wide character set. */
const char *wide_charset;
+ /* Holds the name of the input character set. */
+ const char *input_charset;
+
/* True to warn about precompiled header files we couldn't use. */
bool warn_invalid_pch;
@@ -417,7 +420,7 @@ struct cpp_dir
/* Mapping of file names for this directory for MS-DOS and related
platforms. A NULL-terminated array of (from, to) pairs. */
const char **name_map;
-
+
/* The C front end uses these to recognize duplicated
directories in the search path. */
ino_t ino;
@@ -481,7 +484,7 @@ struct cpp_hashnode GTY(())
{
struct ht_identifier ident;
unsigned int is_directive : 1;
- unsigned int directive_index : 7; /* If is_directive,
+ unsigned int directive_index : 7; /* If is_directive,
then index into directive table.
Otherwise, a NODE_OPERATOR. */
unsigned char rid_code; /* Rid code - for front ends. */