re PR fortran/31645 (Error on reading Byte Order Mark)

PR fortran/31645 * scanner.c (load_file): Discard the byte order mark if one is found on the first non-preprocessor line of a file. * testsuite/gfortran.dg/bom_error.f90: New test. * testsuite/gfortran.dg/bom_include.f90: New test. * testsuite/gfortran.dg/bom_UTF16-LE.f90: New test. * testsuite/gfortran.dg/bom_UTF16-BE.f90: New test. * testsuite/gfortran.dg/bom_UTF-8.f90: New test. * testsuite/gfortran.dg/bom_UTF-32.f90: New test. * testsuite/gfortran.dg/bom_UTF-8.F90: New test. * testsuite/gfortran.dg/bom_include.inc: New file. From-SVN: r124274
author: Francois-Xavier Coudert <fxcoudert@gcc.gnu.org> 2007-04-29 10:45:57 +0000
committer: François-Xavier Coudert <fxcoudert@gcc.gnu.org> 2007-04-29 10:45:57 +0000
commit: caef7872f0dc4a62dfc449785974eaa179b0a449 (patch)
tree: ebf5c3a3515c694847d6fb1e9ca78d6d75adcd53 /gcc/fortran/scanner.c
parent: 70ec16f7c29de16e0ec824de9a8979ff0a902488 (diff)
download: gcc-caef7872f0dc4a62dfc449785974eaa179b0a449.zip
gcc-caef7872f0dc4a62dfc449785974eaa179b0a449.tar.gz
gcc-caef7872f0dc4a62dfc449785974eaa179b0a449.tar.bz2
1 files changed, 27 insertions, 0 deletions
diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c
index c3d3e62..5eaa34f 100644
--- a/gcc/fortran/scanner.c
+++ b/gcc/fortran/scanner.c
@@ -1404,6 +1404,7 @@ load_file (const char *filename, bool initial)
   gfc_file *f;
   FILE *input;
   int len, line_len;
+  bool first_line;
 
   for (f = current_file; f; f = f->up)
     if (strcmp (filename, f->filename) == 0)
@@ -1445,6 +1446,7 @@ load_file (const char *filename, bool initial)
   current_file->line = 1;
   line = NULL;
   line_len = 0;
+  first_line = true;
 
   if (initial && gfc_src_preprocessor_lines[0])
     {
@@ -1467,6 +1469,26 @@ load_file (const char *filename, bool initial)
       if (feof (input) && len == 0)
 	break;
 
+      /* If this is the first line of the file, it can contain a byte
+	 order mark (BOM), which we will ignore:
+	   FF FE is UTF-16 little endian,
+	   FE FF is UTF-16 big endian,
+	   EF BB BF is UTF-8.  */
+      if (first_line
+	  && ((line_len >= 2 && line[0] == '\xFF' && line[1] == '\xFE')
+	      || (line_len >= 2 && line[0] == '\xFE' && line[1] == '\xFF')
+	      || (line_len >= 3 && line[0] == '\xEF' && line[1] == '\xBB'
+		  && line[2] == '\xBF')))
+	{
+	  int n = line[1] == '\xBB' ? 3 : 2;
+	  char * new = gfc_getmem (line_len);
+
+	  strcpy (new, line + n);
+	  gfc_free (line);
+	  line = new;
+	  len -= n;
+	}
+
       /* There are three things this line can be: a line of Fortran
 	 source, an include line or a C preprocessor directive.  */
 
@@ -1476,6 +1498,11 @@ load_file (const char *filename, bool initial)
 	  continue;
 	}
 
+      /* Preprocessed files have preprocessor lines added before the byte
+         order mark, so first_line is not about the first line of the file
+	 but the first line that's not a preprocessor line.  */
+      first_line = false;
+
       if (include_line (line))
 	{
 	  current_file->line++;
author	Francois-Xavier Coudert <fxcoudert@gcc.gnu.org>	2007-04-29 10:45:57 +0000
committer	François-Xavier Coudert <fxcoudert@gcc.gnu.org>	2007-04-29 10:45:57 +0000
commit	caef7872f0dc4a62dfc449785974eaa179b0a449 (patch)
tree	ebf5c3a3515c694847d6fb1e9ca78d6d75adcd53 /gcc/fortran/scanner.c
parent	70ec16f7c29de16e0ec824de9a8979ff0a902488 (diff)
download	gcc-caef7872f0dc4a62dfc449785974eaa179b0a449.zip gcc-caef7872f0dc4a62dfc449785974eaa179b0a449.tar.gz gcc-caef7872f0dc4a62dfc449785974eaa179b0a449.tar.bz2