diff options
author | Francois-Xavier Coudert <fxcoudert@gcc.gnu.org> | 2007-04-29 10:45:57 +0000 |
---|---|---|
committer | François-Xavier Coudert <fxcoudert@gcc.gnu.org> | 2007-04-29 10:45:57 +0000 |
commit | caef7872f0dc4a62dfc449785974eaa179b0a449 (patch) | |
tree | ebf5c3a3515c694847d6fb1e9ca78d6d75adcd53 /gcc/fortran/scanner.c | |
parent | 70ec16f7c29de16e0ec824de9a8979ff0a902488 (diff) | |
download | gcc-caef7872f0dc4a62dfc449785974eaa179b0a449.zip gcc-caef7872f0dc4a62dfc449785974eaa179b0a449.tar.gz gcc-caef7872f0dc4a62dfc449785974eaa179b0a449.tar.bz2 |
re PR fortran/31645 (Error on reading Byte Order Mark)
PR fortran/31645
* scanner.c (load_file): Discard the byte order mark if one is
found on the first non-preprocessor line of a file.
* testsuite/gfortran.dg/bom_error.f90: New test.
* testsuite/gfortran.dg/bom_include.f90: New test.
* testsuite/gfortran.dg/bom_UTF16-LE.f90: New test.
* testsuite/gfortran.dg/bom_UTF16-BE.f90: New test.
* testsuite/gfortran.dg/bom_UTF-8.f90: New test.
* testsuite/gfortran.dg/bom_UTF-32.f90: New test.
* testsuite/gfortran.dg/bom_UTF-8.F90: New test.
* testsuite/gfortran.dg/bom_include.inc: New file.
From-SVN: r124274
Diffstat (limited to 'gcc/fortran/scanner.c')
-rw-r--r-- | gcc/fortran/scanner.c | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c index c3d3e62..5eaa34f 100644 --- a/gcc/fortran/scanner.c +++ b/gcc/fortran/scanner.c @@ -1404,6 +1404,7 @@ load_file (const char *filename, bool initial) gfc_file *f; FILE *input; int len, line_len; + bool first_line; for (f = current_file; f; f = f->up) if (strcmp (filename, f->filename) == 0) @@ -1445,6 +1446,7 @@ load_file (const char *filename, bool initial) current_file->line = 1; line = NULL; line_len = 0; + first_line = true; if (initial && gfc_src_preprocessor_lines[0]) { @@ -1467,6 +1469,26 @@ load_file (const char *filename, bool initial) if (feof (input) && len == 0) break; + /* If this is the first line of the file, it can contain a byte + order mark (BOM), which we will ignore: + FF FE is UTF-16 little endian, + FE FF is UTF-16 big endian, + EF BB BF is UTF-8. */ + if (first_line + && ((line_len >= 2 && line[0] == '\xFF' && line[1] == '\xFE') + || (line_len >= 2 && line[0] == '\xFE' && line[1] == '\xFF') + || (line_len >= 3 && line[0] == '\xEF' && line[1] == '\xBB' + && line[2] == '\xBF'))) + { + int n = line[1] == '\xBB' ? 3 : 2; + char * new = gfc_getmem (line_len); + + strcpy (new, line + n); + gfc_free (line); + line = new; + len -= n; + } + /* There are three things this line can be: a line of Fortran source, an include line or a C preprocessor directive. */ @@ -1476,6 +1498,11 @@ load_file (const char *filename, bool initial) continue; } + /* Preprocessed files have preprocessor lines added before the byte + order mark, so first_line is not about the first line of the file + but the first line that's not a preprocessor line. */ + first_line = false; + if (include_line (line)) { current_file->line++; |