aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog35
-rw-r--r--gcc/c-opts.c4
-rw-r--r--gcc/c.opt5
-rw-r--r--gcc/cppcharset.c108
-rw-r--r--gcc/cppfiles.c55
-rw-r--r--gcc/cpphash.h9
-rw-r--r--gcc/cppinit.c10
-rw-r--r--gcc/cpplib.c4
-rw-r--r--gcc/doc/cppopts.texi10
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.c-torture/execute/wchar_t-1.c1
11 files changed, 130 insertions, 118 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 45b7679..740bc21 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2004-02-02 Eric Christopher <echristo@redhat.com>
+ Zack Weinberg <zack@codesourcery.com>
+
+ * c-opts.c (c_common_handle_option): Add -finput-charset.
+ * c.opt: Ditto.
+ * cppcharset.c (one_iso88591_to_utf8): Remove.
+ (convert_iso88591_utf8): Ditto.
+ (conversion_tab): Remove 8859-1 converter.
+ (_cpp_input_to_utf8): Remove.
+ (_cpp_init_iconv_buffer): Ditto.
+ (_cpp_close_iconv_buffer): Ditto.
+ (_cpp_convert_input): New function.
+ (_cpp_default_encoding): Ditto.
+ * cpphash.h: Add/remove prototypes for above.
+ * cppfiles.c (read_file_guts): Use _cpp_convert_input.
+ * cppinit.c (cpp_create_reader): Use _cpp_default_encoding
+ for narrow execution and input character sets.
+ * cpplib.c (cpp_push_buffer): Delete uses of removed functions.
+ * doc/cppopts.texi: Document -finput-charset.
+
2004-02-02 David Edelsohn <edelsohn@gnu.org>
* rtlanal.c (refers_to_regno_p): Test regno, not inner_regno,
@@ -86,7 +106,7 @@
* postreload.c (reload_combine, reload_combine_note_store,
reload_combine_note_use, reload_cse_move2add, move2add_note_store): Likewise.
* ra-colorize.c (combine, color_usable_p, get_free_reg,
- calculate_dont_begin, calculate_dont_begin, colorize_one_web,
+ calculate_dont_begin, calculate_dont_begin, colorize_one_web,
try_recolor_web, insert_coalesced_conflicts, check_colors,
break_precolored_alias): Likewise.
* ra-debug.c: Include regs.h
@@ -210,7 +230,7 @@
constant.
(write_node): Simplify comparisons against small constants
before printing tests.
-
+
2004-01-31 Kazu Hirata <kazu@cs.umass.edu>
* config/m32r/m32r.c (m32r_load_pic_register): Use GEN_INT
@@ -435,8 +455,8 @@
(mark_constant_pool): Use new pool datastructures.
2004-01-30 Fariborz Jahanian <fjahanian@apple.com>
-
- * config/rs6000/rs6000.c (rs6000_emit_move): Remove #if 0.
+
+ * config/rs6000/rs6000.c (rs6000_emit_move): Remove #if 0.
Copy operands[1] to pseudo for simplify_gen_subreg.
2004-01-30 Kazu Hirata <kazu@cs.umass.edu>
@@ -447,7 +467,7 @@
2004-01-30 Andrew Pinski <pinskia@physics.uc.edu>
* toplev.c: Include alloc-pool.h.
- * Makefile.in (toplev.c): Update dependencies.
+ * Makefile.in (toplev.c): Update dependencies.
2004-01-30 Richard Kenner <kenner@vlsi1.ultra.nyu.edu>
@@ -533,7 +553,7 @@
2004-01-29 Devang Patel <dpatel@apple.com>
* dwarf2out.c (gen_field_die): Do not equate decl number to die.
-
+
2004-01-28 Ian Lance Taylor <ian@wasabisystems.com>
PR inline-asm/6162
@@ -682,7 +702,7 @@
target macros.
2004-01-28 Daniel Berlin <dberlin@dberlin.org>
-
+
* timevar.c (timevar_print): Mention when checking is enabled.
2004-01-28 Giovanni Bajo <giovannibajo@gcc.gnu.org>
@@ -3984,4 +4004,3 @@
* invoke.texi (-O1): Document change.
See ChangeLog.10 for earlier changes.
-
diff --git a/gcc/c-opts.c b/gcc/c-opts.c
index 2df0a7e..c89ba00 100644
--- a/gcc/c-opts.c
+++ b/gcc/c-opts.c
@@ -904,6 +904,10 @@ c_common_handle_option (size_t scode, const char *arg, int value)
cpp_opts->wide_charset = arg;
break;
+ case OPT_finput_charset_:
+ cpp_opts->input_charset = arg;
+ break;
+
case OPT_ftemplate_depth_:
max_tinst_depth = value;
break;
diff --git a/gcc/c.opt b/gcc/c.opt
index 4f9859f..4856fc5 100644
--- a/gcc/c.opt
+++ b/gcc/c.opt
@@ -482,6 +482,11 @@ fexec-charset=
C ObjC C++ ObjC++ Joined RejectNegative
-fexec-charset=<cset> Convert all strings and character constants to character set <cset>
+finput-charset=
+C ObjC C++ ObjC++ Joined RejectNegative
+-finput-charset=<cset> Specify the default character set for source files.
+
+
fexternal-templates
C++ ObjC++
diff --git a/gcc/cppcharset.c b/gcc/cppcharset.c
index 9fc180a..b73128d 100644
--- a/gcc/cppcharset.c
+++ b/gcc/cppcharset.c
@@ -446,31 +446,6 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
return 0;
}
-/* The first 256 code points of ISO 8859.1 have the same numeric
- values as the first 256 code points of Unicode, therefore the
- incoming ISO 8859.1 character can be passed directly to
- one_cppchar_to_utf8 (which expects a Unicode value). */
-
-static int
-one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
- size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
-{
- const uchar *inbuf = *inbufp;
- int rval;
-
- if (*inbytesleftp > 1)
- return EINVAL;
-
- rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
- if (rval)
- return rval;
-
- *inbufp += 1;
- *inbytesleftp -= 1;
-
- return 0;
-}
-
/* Helper routine for the next few functions. The 'const' on
one_conversion means that we promise not to modify what function is
pointed to, which lets the inliner see through it. */
@@ -554,14 +529,6 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
}
-static bool
-convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
- struct _cpp_strbuf *to)
-{
- return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
-}
-
-
/* Identity conversion, used when we have no alternative. */
static bool
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@@ -639,7 +606,6 @@ static const struct conversion conversion_tab[] = {
{ "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
{ "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
{ "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
- { "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
};
/* Subroutine of cpp_init_iconv: initialize and return a
@@ -1388,44 +1354,58 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
}
uchar *
-_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
+_cpp_convert_input (cpp_reader *pfile, const char *input_charset,
+ uchar *input, size_t size, size_t len, off_t *st_size)
{
- struct _cpp_strbuf tbuf;
- struct cset_converter cvt = pfile->buffer->input_cset_desc;
+ struct cset_converter input_cset;
+ struct _cpp_strbuf to;
- tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
- tbuf.text = xmalloc (tbuf.asize);
- tbuf.len = 0;
+ input_cset = init_iconv_desc (pfile, SOURCE_CHARSET, input_charset);
+ if (input_cset.func == convert_no_conversion)
+ {
+ to.text = input;
+ to.asize = size;
+ to.len = len;
+ }
+ else
+ {
+ to.asize = MAX (65536, len);
+ to.text = xmalloc (to.asize);
+ to.len = 0;
- if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
- {
- cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
- return NULL;
- }
+ if (!APPLY_CONVERSION (input_cset, input, len, &to))
+ cpp_error (pfile, CPP_DL_ERROR,
+ "failure to convert %s to %s",
+ CPP_OPTION (pfile, input_charset), SOURCE_CHARSET);
- if (length)
- tbuf.text[tbuf.len] = '\n';
- else
- tbuf.text[0] = '\n';
+ free (input);
+ }
- return tbuf.text;
-}
+ /* Clean up the mess. */
+ if (input_cset.func == convert_using_iconv)
+ iconv_close (input_cset.cd);
- /* Check the input file format. At present assuming the input file
- is in iso-8859-1 format. Convert this input character set to
- source character set format (UTF-8). */
+ /* Resize buffer if we allocated substantially too much, or if we
+ haven't enough space for the \n-terminator. */
+ if (to.len + 4096 < to.asize || to.len >= to.asize)
+ to.text = xrealloc (to.text, to.len + 1);
-void
-_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
-{
- pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
- from);
+ to.text[to.len] = '\n';
+ *st_size = to.len;
+ return to.text;
}
-void
-_cpp_close_iconv_buffer (cpp_reader *pfile)
+const char *
+_cpp_default_encoding (void)
{
- if (HAVE_ICONV
- && pfile->buffer->input_cset_desc.func == convert_using_iconv)
- iconv_close (pfile->buffer->input_cset_desc.cd);
+ const char *current_encoding = NULL;
+
+#if defined (HAVE_LOCALE_H) && defined (HAVE_LANGINFO_CODESET)
+ setlocale (LC_CTYPE, "");
+ current_encoding = nl_langinfo (CODESET);
+#endif
+ if (current_encoding == NULL || *current_encoding == '\0')
+ current_encoding = SOURCE_CHARSET;
+
+ return current_encoding;
}
diff --git a/gcc/cppfiles.c b/gcc/cppfiles.c
index cd735f5..c886c89 100644
--- a/gcc/cppfiles.c
+++ b/gcc/cppfiles.c
@@ -312,7 +312,7 @@ pch_open_file (cpp_reader *pfile, _cpp_file *file, bool *invalid_pch)
/* Try to open the path FILE->name appended to FILE->dir. This is
where remap and PCH intercept the file lookup process. Return true
- if the file was found, whether or not the open was successful.
+ if the file was found, whether or not the open was successful.
Set *INVALID_PCH to true if a PCH file is found but wasn't valid. */
static bool
@@ -398,10 +398,10 @@ _cpp_find_file (cpp_reader *pfile, const char *fname, cpp_dir *start_dir, bool f
open_file_failed (pfile, file);
if (invalid_pch)
{
- cpp_error (pfile, CPP_DL_ERROR,
+ cpp_error (pfile, CPP_DL_ERROR,
"one or more PCH files were found, but they were invalid");
if (!cpp_get_options (pfile)->warn_invalid_pch)
- cpp_error (pfile, CPP_DL_ERROR,
+ cpp_error (pfile, CPP_DL_ERROR,
"use -Winvalid-pch for more information");
}
break;
@@ -457,7 +457,7 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file)
ssize_t size, total, count;
uchar *buf;
bool regular;
-
+
if (S_ISBLK (file->st.st_mode))
{
cpp_error (pfile, CPP_DL_ERROR, "%s is a block device", file->path);
@@ -514,15 +514,8 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file)
cpp_error (pfile, CPP_DL_WARNING,
"%s is shorter than expected", file->path);
- /* Shrink buffer if we allocated substantially too much. */
- if (total + 4096 < size)
- buf = xrealloc (buf, total + 1);
-
- /* The lexer requires that the buffer be \n-terminated. */
- buf[total] = '\n';
-
- file->buffer = buf;
- file->st.st_size = total;
+ file->buffer = _cpp_convert_input (pfile, CPP_OPTION (pfile, input_charset),
+ buf, size, total, &file->st.st_size);
file->buffer_valid = true;
return true;
@@ -566,7 +559,7 @@ should_stack_file (cpp_reader *pfile, _cpp_file *file, bool import)
if (file->once_only)
return false;
- /* We must mark the file once-only if #import now, before header
+ /* We must mark the file once-only if #import now, before header
guard checks. Otherwise, undefining the header guard might
cause the file to be re-stacked. */
if (import)
@@ -1283,7 +1276,7 @@ struct pchf_data {
This is used as an optimisation, it means we don't have to search
the structure if we're processing a regular #include. */
bool have_once_only;
-
+
struct pchf_entry {
/* The size of this file. This is used to save running a MD5 checksum
if the sizes don't match. */
@@ -1298,7 +1291,7 @@ struct pchf_data {
static struct pchf_data *pchf;
/* Data for pchf_addr. */
-struct pchf_adder_info
+struct pchf_adder_info
{
cpp_reader *pfile;
struct pchf_data *d;
@@ -1322,11 +1315,11 @@ pchf_adder (void **slot, void *data)
the PCH file shouldn't be written... */
if (f->dont_read || f->err_no)
return 1;
-
+
d->entries[count].once_only = f->once_only;
d->have_once_only |= f->once_only;
if (f->buffer_valid)
- md5_buffer ((const char *)f->buffer,
+ md5_buffer ((const char *)f->buffer,
f->st.st_size, d->entries[count].sum);
else
{
@@ -1365,22 +1358,22 @@ _cpp_save_file_entries (cpp_reader *pfile, FILE *f)
struct pchf_data *result;
size_t result_size;
struct pchf_adder_info pai;
-
+
count = htab_elements (pfile->file_hash);
- result_size = (sizeof (struct pchf_data)
+ result_size = (sizeof (struct pchf_data)
+ sizeof (struct pchf_entry) * (count - 1));
result = xcalloc (result_size, 1);
-
+
result->count = 0;
result->have_once_only = false;
-
+
pai.pfile = pfile;
pai.d = result;
htab_traverse (pfile->file_hash, pchf_adder, &pai);
result_size = (sizeof (struct pchf_data)
+ sizeof (struct pchf_entry) * (result->count - 1));
-
+
qsort (result->entries, result->count, sizeof (struct pchf_entry),
pchf_save_compare);
@@ -1393,11 +1386,11 @@ bool
_cpp_read_file_entries (cpp_reader *pfile ATTRIBUTE_UNUSED, FILE *f)
{
struct pchf_data d;
-
+
if (fread (&d, sizeof (struct pchf_data) - sizeof (struct pchf_entry), 1, f)
!= 1)
return false;
-
+
pchf = xmalloc (sizeof (struct pchf_data)
+ sizeof (struct pchf_entry) * (d.count - 1));
memcpy (pchf, &d, sizeof (struct pchf_data) - sizeof (struct pchf_entry));
@@ -1422,7 +1415,7 @@ struct pchf_compare_data
/* Do we need to worry about entries that don't have ONCE_ONLY set? */
bool check_included;
-
+
/* The file that we're searching for. */
_cpp_file *f;
};
@@ -1435,15 +1428,15 @@ pchf_compare (const void *d_p, const void *e_p)
const struct pchf_entry *e = (const struct pchf_entry *)e_p;
struct pchf_compare_data *d = (struct pchf_compare_data *)d_p;
int result;
-
+
result = memcmp (&d->size, &e->size, sizeof (off_t));
if (result != 0)
return result;
-
+
if (! d->sum_computed)
{
_cpp_file *const f = d->f;
-
+
md5_buffer ((const char *)f->buffer, f->st.st_size, d->sum);
d->sum_computed = true;
}
@@ -1458,7 +1451,7 @@ pchf_compare (const void *d_p, const void *e_p)
return 1;
}
-/* Check that F is not in a list read from a PCH file (if any).
+/* Check that F is not in a list read from a PCH file (if any).
Assumes that f->buffer_valid is true. Return TRUE if the file
should not be read. */
@@ -1468,7 +1461,7 @@ check_file_against_entries (cpp_reader *pfile ATTRIBUTE_UNUSED,
bool check_included)
{
struct pchf_compare_data d;
-
+
if (pchf == NULL
|| (! check_included && ! pchf->have_once_only))
return false;
diff --git a/gcc/cpphash.h b/gcc/cpphash.h
index 4c044c1..6606746 100644
--- a/gcc/cpphash.h
+++ b/gcc/cpphash.h
@@ -563,9 +563,6 @@ extern void _cpp_init_internal_pragmas (cpp_reader *);
extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
unsigned int, unsigned int);
extern void _cpp_pop_buffer (cpp_reader *);
-extern uchar *_cpp_input_to_utf8 (cpp_reader *, const unsigned char *, cppchar_t);
-extern void _cpp_init_iconv_buffer (cpp_reader *, const char *);
-extern void _cpp_close_iconv_buffer (cpp_reader *);
/* In cpptrad.c. */
extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *);
@@ -582,8 +579,12 @@ extern size_t _cpp_replacement_text_len (const cpp_macro *);
extern cppchar_t _cpp_valid_ucn (cpp_reader *, const uchar **,
const uchar *, int);
extern void _cpp_destroy_iconv (cpp_reader *);
-extern bool _cpp_interpret_string_notranslate (cpp_reader *, const cpp_string *,
+extern bool _cpp_interpret_string_notranslate (cpp_reader *,
+ const cpp_string *,
cpp_string *);
+extern uchar *_cpp_convert_input (cpp_reader *, const char *, uchar *,
+ size_t, size_t, off_t *);
+extern const char *_cpp_default_encoding (void);
/* Utility routines and macros. */
#define DSC(str) (const uchar *)str, sizeof str - 1
diff --git a/gcc/cppinit.c b/gcc/cppinit.c
index ab2331d..d6509fd 100644
--- a/gcc/cppinit.c
+++ b/gcc/cppinit.c
@@ -159,11 +159,11 @@ cpp_create_reader (enum c_lang lang, hash_table *table,
CPP_OPTION (pfile, bytes_big_endian) = 1; /* does not matter */
/* Default to no charset conversion. */
- CPP_OPTION (pfile, narrow_charset) = 0;
+ CPP_OPTION (pfile, narrow_charset) = _cpp_default_encoding ();
CPP_OPTION (pfile, wide_charset) = 0;
- /* Default the input character set to iso-8859-1 for now. */
- CPP_OPTION (pfile, input_charset) = "ISO-8859-1";
+ /* Default the input character set to UTF-8. */
+ CPP_OPTION (pfile, input_charset) = _cpp_default_encoding ();
/* A fake empty "directory" used as the starting point for files
looked up without a search path. Name cannot be '/' because we
@@ -579,7 +579,7 @@ read_original_directory (cpp_reader *pfile)
debugdir[token->val.str.len - 4] = '\0';
pfile->cb.dir_change (pfile, debugdir);
- }
+ }
/* We want to process the fake line changes as regular changes, to
get them output. */
@@ -591,7 +591,7 @@ read_original_directory (cpp_reader *pfile)
/* This is called at the end of preprocessing. It pops the last
buffer and writes dependency output, and returns the number of
errors.
-
+
Maybe it should also reset state, such that you could call
cpp_start_read with a new filename to restart processing. */
int
diff --git a/gcc/cpplib.c b/gcc/cpplib.c
index 33b2de6..fa16859 100644
--- a/gcc/cpplib.c
+++ b/gcc/cpplib.c
@@ -1925,7 +1925,6 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
int from_stage3)
{
cpp_buffer *new = xobnew (&pfile->buffer_ob, cpp_buffer);
- const char *input = CPP_OPTION (pfile, input_charset);
/* Clears, amongst other things, if_stack and mi_cmacro. */
memset (new, 0, sizeof (cpp_buffer));
@@ -1937,7 +1936,6 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
new->need_line = true;
pfile->buffer = new;
- _cpp_init_iconv_buffer (pfile, input);
return new;
}
@@ -1960,8 +1958,6 @@ _cpp_pop_buffer (cpp_reader *pfile)
/* In case of a missing #endif. */
pfile->state.skipping = 0;
- _cpp_close_iconv_buffer (pfile);
-
/* _cpp_do_file_change expects pfile->buffer to be the new one. */
pfile->buffer = buffer->prev;
diff --git a/gcc/doc/cppopts.texi b/gcc/doc/cppopts.texi
index 653e2e7..4649e6ec 100644
--- a/gcc/doc/cppopts.texi
+++ b/gcc/doc/cppopts.texi
@@ -511,6 +511,16 @@ corresponds to the width of @code{wchar_t}. As with
by the system's @code{iconv} library routine; however, you will have
problems with encodings that do not fit exactly in @code{wchar_t}.
+@item -finput-charset=@var{charset}
+@opindex finput-charset
+Set the input character set, used for translation from the character
+set of the input file to the source character set used by GCC. If the
+locale does not specify, or GCC cannot get this information from the
+locale, the default is UTF-8. This can be overriden by either the locale
+or this command line option. Currently the command line option takes
+precedence if there's a conflict. @var{charset} can be any encoding
+supported by the system's @code{iconv} library routine.
+
@item -fworking-directory
@opindex fworking-directory
@opindex fno-working-directory
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 56a7c0c..84a9692 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2004-02-02 Eric Christopher <echristo@redhat.com>
+ Zack Weinberg <zack@codesourcery.com>
+
+ * gcc.c-torture/execute/wchar_t-1.c: Add -finput-charset.
+
2004-02-02 Zack Weinberg <zack@codesourcery.com>
* g++.dg/eh/forced1.C, g++.dg/eh/forced2.C, g++.dg/eh/forced3.C
@@ -20556,5 +20561,3 @@ rlsruhe.de>
correspond to c-torture 1.11.
* New file.
-
-
diff --git a/gcc/testsuite/gcc.c-torture/execute/wchar_t-1.c b/gcc/testsuite/gcc.c-torture/execute/wchar_t-1.c
index 3efdcf5..7e90cc0 100644
--- a/gcc/testsuite/gcc.c-torture/execute/wchar_t-1.c
+++ b/gcc/testsuite/gcc.c-torture/execute/wchar_t-1.c
@@ -1,3 +1,4 @@
+/* { dg-options "-finput-charset=utf-8" } */
typedef __WCHAR_TYPE__ wchar_t;
wchar_t x[] = L"Ä";
wchar_t y = L'Ä';