11 files changed, 130 insertions, 118 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 45b7679..740bc21 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2004-02-02  Eric Christopher  <echristo@redhat.com>
+            Zack Weinberg  <zack@codesourcery.com>
+
+	* c-opts.c (c_common_handle_option): Add -finput-charset.
+        * c.opt: Ditto.
+        * cppcharset.c (one_iso88591_to_utf8): Remove.
+        (convert_iso88591_utf8): Ditto.
+        (conversion_tab): Remove 8859-1 converter.
+        (_cpp_input_to_utf8): Remove.
+        (_cpp_init_iconv_buffer): Ditto.
+        (_cpp_close_iconv_buffer): Ditto.
+        (_cpp_convert_input): New function.
+        (_cpp_default_encoding): Ditto.
+        * cpphash.h: Add/remove prototypes for above.
+        * cppfiles.c (read_file_guts): Use _cpp_convert_input.
+        * cppinit.c (cpp_create_reader): Use _cpp_default_encoding
+        for narrow execution and input character sets.
+        * cpplib.c (cpp_push_buffer): Delete uses of removed functions.
+        * doc/cppopts.texi: Document -finput-charset.
+
 2004-02-02  David Edelsohn  <edelsohn@gnu.org>
 
 	* rtlanal.c (refers_to_regno_p): Test regno, not inner_regno,
@@ -86,7 +106,7 @@
 	* postreload.c (reload_combine, reload_combine_note_store,
 	reload_combine_note_use, reload_cse_move2add, move2add_note_store): Likewise.
 	* ra-colorize.c (combine, color_usable_p, get_free_reg,
-	calculate_dont_begin, calculate_dont_begin, colorize_one_web, 
+	calculate_dont_begin, calculate_dont_begin, colorize_one_web,
 	try_recolor_web, insert_coalesced_conflicts, check_colors,
 	break_precolored_alias): Likewise.
 	* ra-debug.c: Include regs.h
@@ -210,7 +230,7 @@
 	constant.
 	(write_node): Simplify comparisons against small constants
 	before printing tests.
-	
+
 2004-01-31  Kazu Hirata  <kazu@cs.umass.edu>
 
 	* config/m32r/m32r.c (m32r_load_pic_register): Use GEN_INT
@@ -435,8 +455,8 @@
 	(mark_constant_pool): Use new pool datastructures.
 
 2004-01-30  Fariborz Jahanian <fjahanian@apple.com>
-        
-        * config/rs6000/rs6000.c (rs6000_emit_move): Remove #if 0. 
+
+        * config/rs6000/rs6000.c (rs6000_emit_move): Remove #if 0.
 	Copy operands[1] to pseudo for simplify_gen_subreg.
 
 2004-01-30  Kazu Hirata  <kazu@cs.umass.edu>
@@ -447,7 +467,7 @@
 2004-01-30  Andrew Pinski <pinskia@physics.uc.edu>
 
 	* toplev.c: Include alloc-pool.h.
-	* Makefile.in (toplev.c): Update dependencies. 
+	* Makefile.in (toplev.c): Update dependencies.
 
 2004-01-30  Richard Kenner  <kenner@vlsi1.ultra.nyu.edu>
 
@@ -533,7 +553,7 @@
 2004-01-29  Devang Patel  <dpatel@apple.com>
 
 	* dwarf2out.c (gen_field_die): Do not equate decl number to die.
-	
+
 2004-01-28  Ian Lance Taylor  <ian@wasabisystems.com>
 
 	PR inline-asm/6162
@@ -682,7 +702,7 @@
 	target macros.
 
 2004-01-28  Daniel Berlin  <dberlin@dberlin.org>
-	
+
 	* timevar.c (timevar_print): Mention when checking is enabled.
 
 2004-01-28  Giovanni Bajo  <giovannibajo@gcc.gnu.org>
@@ -3984,4 +4004,3 @@
 	* invoke.texi (-O1): Document change.
 
 See ChangeLog.10 for earlier changes.
-
diff --git a/gcc/c-opts.c b/gcc/c-opts.c
index 2df0a7e..c89ba00 100644
--- a/gcc/c-opts.c
+++ b/gcc/c-opts.c
@@ -904,6 +904,10 @@ c_common_handle_option (size_t scode, const char *arg, int value)
       cpp_opts->wide_charset = arg;
       break;
 
+    case OPT_finput_charset_:
+      cpp_opts->input_charset = arg;
+      break;
+
     case OPT_ftemplate_depth_:
       max_tinst_depth = value;
       break;
diff --git a/gcc/c.opt b/gcc/c.opt
index 4f9859f..4856fc5 100644
--- a/gcc/c.opt
+++ b/gcc/c.opt
@@ -482,6 +482,11 @@ fexec-charset=
 C ObjC C++ ObjC++ Joined RejectNegative
 -fexec-charset=<cset>	Convert all strings and character constants to character set <cset>
 
+finput-charset=
+C ObjC C++ ObjC++ Joined RejectNegative
+-finput-charset=<cset>      Specify the default character set for source files.
+
+
 fexternal-templates
 C++ ObjC++
 
diff --git a/gcc/cppcharset.c b/gcc/cppcharset.c
index 9fc180a..b73128d 100644
--- a/gcc/cppcharset.c
+++ b/gcc/cppcharset.c
@@ -446,31 +446,6 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
   return 0;
 }
 
-/* The first 256 code points of ISO 8859.1 have the same numeric
-   values as the first 256 code points of Unicode, therefore the
-   incoming ISO 8859.1 character can be passed directly to
-   one_cppchar_to_utf8 (which expects a Unicode value).  */
-
-static int
-one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
-		      size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
-{
-  const uchar *inbuf = *inbufp;
-  int rval;
-
-  if (*inbytesleftp > 1)
-    return EINVAL;
-
-  rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
-  if (rval)
-    return rval;
-
-  *inbufp += 1;
-  *inbytesleftp -= 1;
-
-  return 0;
-}
-
 /* Helper routine for the next few functions.  The 'const' on
    one_conversion means that we promise not to modify what function is
    pointed to, which lets the inliner see through it.  */
@@ -554,14 +529,6 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
   return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
 }
 
-static bool
-convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
-                       struct _cpp_strbuf *to)
-{
-  return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
-}
-
-
 /* Identity conversion, used when we have no alternative.  */
 static bool
 convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@@ -639,7 +606,6 @@ static const struct conversion conversion_tab[] = {
   { "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
   { "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
   { "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
-  { "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
 };
 
 /* Subroutine of cpp_init_iconv: initialize and return a
@@ -1388,44 +1354,58 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
 }
 
 uchar *
-_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
+_cpp_convert_input (cpp_reader *pfile, const char *input_charset,
+		    uchar *input, size_t size, size_t len, off_t *st_size)
 {
-  struct _cpp_strbuf tbuf;
-  struct cset_converter cvt = pfile->buffer->input_cset_desc;
+  struct cset_converter input_cset;
+  struct _cpp_strbuf to;
 
-  tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
-  tbuf.text = xmalloc (tbuf.asize);
-  tbuf.len = 0;
+  input_cset = init_iconv_desc (pfile, SOURCE_CHARSET, input_charset);
+  if (input_cset.func == convert_no_conversion)
+    {
+      to.text = input;
+      to.asize = size;
+      to.len = len;
+    }
+  else
+    {
+      to.asize = MAX (65536, len);
+      to.text = xmalloc (to.asize);
+      to.len = 0;
 
-  if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
-   {
-      cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
-      return NULL;
-   }
+      if (!APPLY_CONVERSION (input_cset, input, len, &to))
+	cpp_error (pfile, CPP_DL_ERROR,
+		   "failure to convert %s to %s",
+		   CPP_OPTION (pfile, input_charset), SOURCE_CHARSET);
 
-  if (length)
-    tbuf.text[tbuf.len] = '\n';
-  else
-    tbuf.text[0] = '\n';
+      free (input);
+    }
 
-  return tbuf.text;
-}
+  /* Clean up the mess.  */
+  if (input_cset.func == convert_using_iconv)
+    iconv_close (input_cset.cd);
 
-  /* Check the input file format. At present assuming the input file
-     is in iso-8859-1 format. Convert this input character set to
-     source character set format (UTF-8). */
+  /* Resize buffer if we allocated substantially too much, or if we
+     haven't enough space for the \n-terminator.  */
+  if (to.len + 4096 < to.asize || to.len >= to.asize)
+    to.text = xrealloc (to.text, to.len + 1);
 
-void
-_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
-{
-  pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
-						    from);
+  to.text[to.len] = '\n';
+  *st_size = to.len;
+  return to.text;
 }
 
-void
-_cpp_close_iconv_buffer (cpp_reader *pfile)
+const char *
+_cpp_default_encoding (void)
 {
-  if (HAVE_ICONV
-      && pfile->buffer->input_cset_desc.func == convert_using_iconv)
-    iconv_close (pfile->buffer->input_cset_desc.cd);
+  const char *current_encoding = NULL;
+
+#if defined (HAVE_LOCALE_H) && defined (HAVE_LANGINFO_CODESET)
+  setlocale (LC_CTYPE, "");
+  current_encoding = nl_langinfo (CODESET);
+#endif
+  if (current_encoding == NULL || *current_encoding == '\0')
+    current_encoding = SOURCE_CHARSET;
+
+  return current_encoding;
 }
diff --git a/gcc/cppfiles.c b/gcc/cppfiles.c
index cd735f5..c886c89 100644
--- a/gcc/cppfiles.c
+++ b/gcc/cppfiles.c
@@ -312,7 +312,7 @@ pch_open_file (cpp_reader *pfile, _cpp_file *file, bool *invalid_pch)
 
 /* Try to open the path FILE->name appended to FILE->dir.  This is
    where remap and PCH intercept the file lookup process.  Return true
-   if the file was found, whether or not the open was successful.  
+   if the file was found, whether or not the open was successful.
    Set *INVALID_PCH to true if a PCH file is found but wasn't valid.  */
 
 static bool
@@ -398,10 +398,10 @@ _cpp_find_file (cpp_reader *pfile, const char *fname, cpp_dir *start_dir, bool f
 	  open_file_failed (pfile, file);
 	  if (invalid_pch)
 	    {
-	      cpp_error (pfile, CPP_DL_ERROR, 
+	      cpp_error (pfile, CPP_DL_ERROR,
 	       "one or more PCH files were found, but they were invalid");
 	      if (!cpp_get_options (pfile)->warn_invalid_pch)
-		cpp_error (pfile, CPP_DL_ERROR, 
+		cpp_error (pfile, CPP_DL_ERROR,
 			   "use -Winvalid-pch for more information");
 	    }
 	  break;
@@ -457,7 +457,7 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file)
   ssize_t size, total, count;
   uchar *buf;
   bool regular;
-  
+
   if (S_ISBLK (file->st.st_mode))
     {
       cpp_error (pfile, CPP_DL_ERROR, "%s is a block device", file->path);
@@ -514,15 +514,8 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file)
     cpp_error (pfile, CPP_DL_WARNING,
 	       "%s is shorter than expected", file->path);
 
-  /* Shrink buffer if we allocated substantially too much.  */
-  if (total + 4096 < size)
-    buf = xrealloc (buf, total + 1);
-
-  /* The lexer requires that the buffer be \n-terminated.  */
-  buf[total] = '\n';
-
-  file->buffer = buf;
-  file->st.st_size = total;
+  file->buffer = _cpp_convert_input (pfile, CPP_OPTION (pfile, input_charset),
+				     buf, size, total, &file->st.st_size);
   file->buffer_valid = true;
 
   return true;
@@ -566,7 +559,7 @@ should_stack_file (cpp_reader *pfile, _cpp_file *file, bool import)
   if (file->once_only)
     return false;
 
-  /* We must mark the file once-only if #import now, before header 
+  /* We must mark the file once-only if #import now, before header
      guard checks.  Otherwise, undefining the header guard might
      cause the file to be re-stacked.  */
   if (import)
@@ -1283,7 +1276,7 @@ struct pchf_data {
      This is used as an optimisation, it means we don't have to search
      the structure if we're processing a regular #include.  */
   bool have_once_only;
-  
+
   struct pchf_entry {
     /* The size of this file.  This is used to save running a MD5 checksum
        if the sizes don't match.  */
@@ -1298,7 +1291,7 @@ struct pchf_data {
 static struct pchf_data *pchf;
 
 /* Data for pchf_addr.  */
-struct pchf_adder_info 
+struct pchf_adder_info
 {
   cpp_reader *pfile;
   struct pchf_data *d;
@@ -1322,11 +1315,11 @@ pchf_adder (void **slot, void *data)
 	 the PCH file shouldn't be written...  */
       if (f->dont_read || f->err_no)
 	return 1;
-      
+
       d->entries[count].once_only = f->once_only;
       d->have_once_only |= f->once_only;
       if (f->buffer_valid)
-	  md5_buffer ((const char *)f->buffer, 
+	  md5_buffer ((const char *)f->buffer,
 		      f->st.st_size, d->entries[count].sum);
       else
 	{
@@ -1365,22 +1358,22 @@ _cpp_save_file_entries (cpp_reader *pfile, FILE *f)
   struct pchf_data *result;
   size_t result_size;
   struct pchf_adder_info pai;
-  
+
   count = htab_elements (pfile->file_hash);
-  result_size = (sizeof (struct pchf_data) 
+  result_size = (sizeof (struct pchf_data)
 		 + sizeof (struct pchf_entry) * (count - 1));
   result = xcalloc (result_size, 1);
-  
+
   result->count = 0;
   result->have_once_only = false;
-  
+
   pai.pfile = pfile;
   pai.d = result;
   htab_traverse (pfile->file_hash, pchf_adder, &pai);
 
   result_size = (sizeof (struct pchf_data)
                  + sizeof (struct pchf_entry) * (result->count - 1));
-  
+
   qsort (result->entries, result->count, sizeof (struct pchf_entry),
 	 pchf_save_compare);
 
@@ -1393,11 +1386,11 @@ bool
 _cpp_read_file_entries (cpp_reader *pfile ATTRIBUTE_UNUSED, FILE *f)
 {
   struct pchf_data d;
-  
+
   if (fread (&d, sizeof (struct pchf_data) - sizeof (struct pchf_entry), 1, f)
        != 1)
     return false;
-  
+
   pchf = xmalloc (sizeof (struct pchf_data)
 		  + sizeof (struct pchf_entry) * (d.count - 1));
   memcpy (pchf, &d, sizeof (struct pchf_data) - sizeof (struct pchf_entry));
@@ -1422,7 +1415,7 @@ struct pchf_compare_data
 
   /* Do we need to worry about entries that don't have ONCE_ONLY set?  */
   bool check_included;
-  
+
   /* The file that we're searching for.  */
   _cpp_file *f;
 };
@@ -1435,15 +1428,15 @@ pchf_compare (const void *d_p, const void *e_p)
   const struct pchf_entry *e = (const struct pchf_entry *)e_p;
   struct pchf_compare_data *d = (struct pchf_compare_data *)d_p;
   int result;
-  
+
   result = memcmp (&d->size, &e->size, sizeof (off_t));
   if (result != 0)
     return result;
-  
+
   if (! d->sum_computed)
     {
       _cpp_file *const f = d->f;
-      
+
       md5_buffer ((const char *)f->buffer, f->st.st_size, d->sum);
       d->sum_computed = true;
     }
@@ -1458,7 +1451,7 @@ pchf_compare (const void *d_p, const void *e_p)
     return 1;
 }
 
-/* Check that F is not in a list read from a PCH file (if any).  
+/* Check that F is not in a list read from a PCH file (if any).
    Assumes that f->buffer_valid is true.  Return TRUE if the file
    should not be read.  */
 
@@ -1468,7 +1461,7 @@ check_file_against_entries (cpp_reader *pfile ATTRIBUTE_UNUSED,
 			    bool check_included)
 {
   struct pchf_compare_data d;
-  
+
   if (pchf == NULL
       || (! check_included && ! pchf->have_once_only))
     return false;
diff --git a/gcc/cpphash.h b/gcc/cpphash.h
index 4c044c1..6606746 100644
--- a/gcc/cpphash.h
+++ b/gcc/cpphash.h
@@ -563,9 +563,6 @@ extern void _cpp_init_internal_pragmas (cpp_reader *);
 extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
 				 unsigned int, unsigned int);
 extern void _cpp_pop_buffer (cpp_reader *);
-extern uchar *_cpp_input_to_utf8 (cpp_reader *, const unsigned char *, cppchar_t);
-extern void _cpp_init_iconv_buffer (cpp_reader *, const char *);
-extern void _cpp_close_iconv_buffer (cpp_reader *);
 
 /* In cpptrad.c.  */
 extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *);
@@ -582,8 +579,12 @@ extern size_t _cpp_replacement_text_len (const cpp_macro *);
 extern cppchar_t _cpp_valid_ucn (cpp_reader *, const uchar **,
 				 const uchar *, int);
 extern void _cpp_destroy_iconv (cpp_reader *);
-extern bool _cpp_interpret_string_notranslate (cpp_reader *, const cpp_string *,
+extern bool _cpp_interpret_string_notranslate (cpp_reader *,
+					       const cpp_string *,
 					       cpp_string *);
+extern uchar *_cpp_convert_input (cpp_reader *, const char *, uchar *,
+				  size_t, size_t, off_t *);
+extern const char *_cpp_default_encoding (void);
 
 /* Utility routines and macros.  */
 #define DSC(str) (const uchar *)str, sizeof str - 1
diff --git a/gcc/cppinit.c b/gcc/cppinit.c
index ab2331d..d6509fd 100644
--- a/gcc/cppinit.c
+++ b/gcc/cppinit.c
@@ -159,11 +159,11 @@ cpp_create_reader (enum c_lang lang, hash_table *table,
   CPP_OPTION (pfile, bytes_big_endian) = 1;  /* does not matter */
 
   /* Default to no charset conversion.  */
-  CPP_OPTION (pfile, narrow_charset) = 0;
+  CPP_OPTION (pfile, narrow_charset) = _cpp_default_encoding ();
   CPP_OPTION (pfile, wide_charset) = 0;
 
-  /* Default the input character set to iso-8859-1 for now. */
-  CPP_OPTION (pfile, input_charset) = "ISO-8859-1";
+  /* Default the input character set to UTF-8.  */
+  CPP_OPTION (pfile, input_charset) = _cpp_default_encoding ();
 
   /* A fake empty "directory" used as the starting point for files
      looked up without a search path.  Name cannot be '/' because we
@@ -579,7 +579,7 @@ read_original_directory (cpp_reader *pfile)
       debugdir[token->val.str.len - 4] = '\0';
 
       pfile->cb.dir_change (pfile, debugdir);
-    }      
+    }
 
   /* We want to process the fake line changes as regular changes, to
      get them output.  */
@@ -591,7 +591,7 @@ read_original_directory (cpp_reader *pfile)
 /* This is called at the end of preprocessing.  It pops the last
    buffer and writes dependency output, and returns the number of
    errors.
- 
+
    Maybe it should also reset state, such that you could call
    cpp_start_read with a new filename to restart processing.  */
 int
diff --git a/gcc/cpplib.c b/gcc/cpplib.c
index 33b2de6..fa16859 100644
--- a/gcc/cpplib.c
+++ b/gcc/cpplib.c
@@ -1925,7 +1925,6 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
 		 int from_stage3)
 {
   cpp_buffer *new = xobnew (&pfile->buffer_ob, cpp_buffer);
-  const char *input = CPP_OPTION (pfile, input_charset);
 
   /* Clears, amongst other things, if_stack and mi_cmacro.  */
   memset (new, 0, sizeof (cpp_buffer));
@@ -1937,7 +1936,6 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
   new->need_line = true;
 
   pfile->buffer = new;
-  _cpp_init_iconv_buffer (pfile, input);
 
   return new;
 }
@@ -1960,8 +1958,6 @@ _cpp_pop_buffer (cpp_reader *pfile)
   /* In case of a missing #endif.  */
   pfile->state.skipping = 0;
 
-  _cpp_close_iconv_buffer (pfile);
-
   /* _cpp_do_file_change expects pfile->buffer to be the new one.  */
   pfile->buffer = buffer->prev;
 
diff --git a/gcc/doc/cppopts.texi b/gcc/doc/cppopts.texi
index 653e2e7..4649e6ec 100644
--- a/gcc/doc/cppopts.texi
+++ b/gcc/doc/cppopts.texi
@@ -511,6 +511,16 @@ corresponds to the width of @code{wchar_t}.  As with
 by the system's @code{iconv} library routine; however, you will have
 problems with encodings that do not fit exactly in @code{wchar_t}.
 
+@item -finput-charset=@var{charset}
+@opindex finput-charset
+Set the input character set, used for translation from the character
+set of the input file to the source character set used by GCC. If the
+locale does not specify, or GCC cannot get this information from the
+locale, the default is UTF-8. This can be overriden by either the locale
+or this command line option. Currently the command line option takes
+precedence if there's a conflict. @var{charset} can be any encoding
+supported by the system's @code{iconv} library routine.
+
 @item -fworking-directory
 @opindex fworking-directory
 @opindex fno-working-directory
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 56a7c0c..84a9692 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2004-02-02 Eric Christopher  <echristo@redhat.com>
+           Zack Weinberg  <zack@codesourcery.com>
+
+	* gcc.c-torture/execute/wchar_t-1.c: Add -finput-charset.
+
 2004-02-02  Zack Weinberg  <zack@codesourcery.com>
 
 	* g++.dg/eh/forced1.C, g++.dg/eh/forced2.C, g++.dg/eh/forced3.C
@@ -20556,5 +20561,3 @@ rlsruhe.de>
 	correspond to c-torture	1.11.
 
 	* New file.
-
-
diff --git a/gcc/testsuite/gcc.c-torture/execute/wchar_t-1.c b/gcc/testsuite/gcc.c-torture/execute/wchar_t-1.c
index 3efdcf5..7e90cc0 100644
--- a/gcc/testsuite/gcc.c-torture/execute/wchar_t-1.c
+++ b/gcc/testsuite/gcc.c-torture/execute/wchar_t-1.c
@@ -1,3 +1,4 @@
+/* { dg-options "-finput-charset=utf-8" } */
 typedef __WCHAR_TYPE__ wchar_t;
 wchar_t x[] = L"Ä";
 wchar_t y = L'Ä';